mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 15:41:46 +00:00
Initial support for vectorization using Libmvec (GLIBC vector math library)
Differential Revision: https://reviews.llvm.org/D88154
This commit is contained in:
parent
04e42f6254
commit
57cdc52c4d
@ -348,7 +348,7 @@ CODEGENOPT(CodeViewGHash, 1, 0)
|
||||
ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NormalInlining)
|
||||
|
||||
// Vector functions library to use.
|
||||
ENUM_CODEGENOPT(VecLib, VectorLibrary, 2, NoLibrary)
|
||||
ENUM_CODEGENOPT(VecLib, VectorLibrary, 3, NoLibrary)
|
||||
|
||||
/// The default TLS model to use.
|
||||
ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel)
|
||||
|
@ -54,11 +54,11 @@ public:
|
||||
enum VectorLibrary {
|
||||
NoLibrary, // Don't use any vector library.
|
||||
Accelerate, // Use the Accelerate framework.
|
||||
LIBMVEC, // GLIBC vector math library.
|
||||
MASSV, // IBM MASS vector library.
|
||||
SVML // Intel short vector math library.
|
||||
};
|
||||
|
||||
|
||||
enum ObjCDispatchMethodKind {
|
||||
Legacy = 0,
|
||||
NonLegacy = 1,
|
||||
|
@ -1582,7 +1582,7 @@ def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-m
|
||||
Group<f_clang_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"Disables an experimental new pass manager in LLVM.">;
|
||||
def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">;
|
||||
HelpText<"Use the given vector functions library">, Values<"Accelerate,libmvec,MASSV,SVML,none">;
|
||||
def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
|
||||
Alias<flax_vector_conversions_EQ>, AliasArgs<["none"]>;
|
||||
def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group<f_Group>,
|
||||
|
@ -371,6 +371,16 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
|
||||
case CodeGenOptions::Accelerate:
|
||||
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate);
|
||||
break;
|
||||
case CodeGenOptions::LIBMVEC:
|
||||
switch(TargetTriple.getArch()) {
|
||||
default:
|
||||
break;
|
||||
case llvm::Triple::x86_64:
|
||||
TLII->addVectorizableFunctionsFromVecLib
|
||||
(TargetLibraryInfoImpl::LIBMVEC_X86);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case CodeGenOptions::MASSV:
|
||||
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV);
|
||||
break;
|
||||
|
@ -749,6 +749,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
||||
StringRef Name = A->getValue();
|
||||
if (Name == "Accelerate")
|
||||
Opts.setVecLib(CodeGenOptions::Accelerate);
|
||||
else if (Name == "libmvec")
|
||||
Opts.setVecLib(CodeGenOptions::LIBMVEC);
|
||||
else if (Name == "MASSV")
|
||||
Opts.setVecLib(CodeGenOptions::MASSV);
|
||||
else if (Name == "SVML")
|
||||
|
@ -73,6 +73,7 @@
|
||||
// FLTOALL-NEXT: thin
|
||||
// RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL
|
||||
// FVECLIBALL: Accelerate
|
||||
// FVECLIBALL-NEXT: libmvec
|
||||
// FVECLIBALL-NEXT: MASSV
|
||||
// FVECLIBALL-NEXT: none
|
||||
// FVECLIBALL-NEXT: SVML
|
||||
|
@ -1,10 +1,12 @@
|
||||
// RUN: %clang -### -c -fveclib=none %s 2>&1 | FileCheck -check-prefix CHECK-NOLIB %s
|
||||
// RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck -check-prefix CHECK-ACCELERATE %s
|
||||
// RUN: %clang -### -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s
|
||||
// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s
|
||||
// RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s
|
||||
|
||||
// CHECK-NOLIB: "-fveclib=none"
|
||||
// CHECK-ACCELERATE: "-fveclib=Accelerate"
|
||||
// CHECK-libmvec: "-fveclib=libmvec"
|
||||
// CHECK-MASSV: "-fveclib=MASSV"
|
||||
|
||||
// CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'
|
||||
|
@ -88,6 +88,7 @@ public:
|
||||
enum VectorLibrary {
|
||||
NoLibrary, // Don't use any vector library.
|
||||
Accelerate, // Use Accelerate framework.
|
||||
LIBMVEC_X86,// GLIBC Vector Math library.
|
||||
MASSV, // IBM MASS vector library.
|
||||
SVML // Intel short vector math library.
|
||||
};
|
||||
|
@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
|
||||
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
|
||||
|
||||
|
||||
#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS)
|
||||
// GLIBC Vector math Functions
|
||||
|
||||
TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2)
|
||||
TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4)
|
||||
TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2)
|
||||
TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4)
|
||||
TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2)
|
||||
TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4)
|
||||
TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2)
|
||||
TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4)
|
||||
TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2)
|
||||
TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4)
|
||||
TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2)
|
||||
TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4)
|
||||
TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2)
|
||||
TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4)
|
||||
TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2)
|
||||
TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4)
|
||||
TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8)
|
||||
|
||||
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
|
||||
// IBM MASS library's vector Functions
|
||||
|
||||
@ -339,6 +420,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16)
|
||||
|
||||
#undef TLI_DEFINE_VECFUNC
|
||||
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
|
||||
#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
|
||||
#undef TLI_DEFINE_MASSV_VECFUNCS
|
||||
#undef TLI_DEFINE_SVML_VECFUNCS
|
||||
#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
|
||||
|
@ -24,6 +24,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
|
||||
"No vector functions library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
|
||||
"Accelerate framework"),
|
||||
clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86",
|
||||
"GLIBC Vector Math library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
|
||||
"IBM MASS vector library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
|
||||
@ -1559,6 +1561,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
|
||||
addVectorizableFunctions(VecFuncs);
|
||||
break;
|
||||
}
|
||||
case LIBMVEC_X86: {
|
||||
const VecDesc VecFuncs[] = {
|
||||
#define TLI_DEFINE_LIBMVEC_X86_VECFUNCS
|
||||
#include "llvm/Analysis/VecFuncs.def"
|
||||
};
|
||||
addVectorizableFunctions(VecFuncs);
|
||||
break;
|
||||
}
|
||||
case MASSV: {
|
||||
const VecDesc VecFuncs[] = {
|
||||
#define TLI_DEFINE_MASSV_VECFUNCS
|
||||
|
@ -0,0 +1,373 @@
|
||||
; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @sin_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f64(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @sin(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = distinct !{!1, !2, !3}
|
||||
!2 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @sin_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f32(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @sinf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!21 = distinct !{!21, !22, !23}
|
||||
!22 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!23 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @sin_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f64_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.sin.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!31 = distinct !{!31, !32, !33}
|
||||
!32 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||
!33 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @sin_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f32_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.sin.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!41 = distinct !{!41, !42, !43}
|
||||
!42 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!43 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f64(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @cos(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!51 = distinct !{!51, !52, !53}
|
||||
!52 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||
!53 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f32(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @cosf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!61 = distinct !{!61, !62, !63}
|
||||
!62 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!63 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f64_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.cos.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!71 = distinct !{!71, !72, !73}
|
||||
!72 = !{!"llvm.loop.vectorize.width", i32 2}
|
||||
!73 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f32_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.cos.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!81 = distinct !{!81, !82, !83}
|
||||
!82 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!83 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @exp_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <8 x float> @_ZGVdN8v_expf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @expf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!91 = distinct !{!91, !92, !93}
|
||||
!92 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!93 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @exp_f32_intrin(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f32_intrin
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <8 x float> @_ZGVdN8v_expf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @llvm.exp.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!101 = distinct !{!101, !102, !103}
|
||||
!102 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!103 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @log_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <8 x float> @_ZGVdN8v_logf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @logf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!111 = distinct !{!111, !112, !113}
|
||||
!112 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!113 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <8 x float> @_ZGVdN8vv_powf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
|
||||
%tmp1 = load float, float* %arrayidx, align 4
|
||||
%tmp2 = tail call fast float @powf(float %conv, float %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %tmp2, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!121 = distinct !{!121, !122, !123}
|
||||
!122 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!123 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f32_intrin
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <8 x float> @_ZGVdN8vv_powf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
|
||||
%tmp1 = load float, float* %arrayidx, align 4
|
||||
%tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %tmp2, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!131 = distinct !{!131, !132, !133}
|
||||
!132 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!133 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
||||
declare double @sin(double) #0
|
||||
declare float @sinf(float) #0
|
||||
declare double @llvm.sin.f64(double) #0
|
||||
declare float @llvm.sin.f32(float) #0
|
||||
declare double @cos(double) #0
|
||||
declare float @cosf(float) #0
|
||||
declare double @llvm.cos.f64(double) #0
|
||||
declare float @llvm.cos.f32(float) #0
|
||||
declare float @expf(float) #0
|
||||
declare float @powf(float, float) #0
|
||||
declare float @llvm.exp.f32(float) #0
|
||||
declare float @logf(float) #0
|
||||
declare float @llvm.pow.f32(float, float) #0
|
@ -0,0 +1,176 @@
|
||||
; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @exp_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4v___expf_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @__expf_finite(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = distinct !{!1, !2, !3}
|
||||
!2 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @exp_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f64
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x double> @_ZGVdN4v___exp_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call fast double @__exp_finite(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!11 = distinct !{!11, !12, !13}
|
||||
!12 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!13 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @log_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4v___logf_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @__logf_finite(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!21 = distinct !{!21, !22, !23}
|
||||
!22 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!23 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @log_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @log_f64
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x double> @_ZGVdN4v___log_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call fast double @__log_finite(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!31 = distinct !{!31, !32, !33}
|
||||
!32 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!33 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4vv___powf_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
|
||||
%tmp1 = load float, float* %arrayidx, align 4
|
||||
%tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %tmp2, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!41 = distinct !{!41, !42, !43}
|
||||
!42 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!43 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f64
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x double> @_ZGVdN4vv___pow_finite
|
||||
; CHECK: ret
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv
|
||||
%tmp1 = load double, double* %arrayidx, align 4
|
||||
%tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv
|
||||
store double %tmp2, double* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!51 = distinct !{!51, !52, !53}
|
||||
!52 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!53 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
declare float @__expf_finite(float) #0
|
||||
declare double @__exp_finite(double) #0
|
||||
declare float @__logf_finite(float) #0
|
||||
declare double @__log_finite(double) #0
|
||||
declare float @__powf_finite(float, float) #0
|
||||
declare double @__pow_finite(double, double) #0
|
373
llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
Normal file
373
llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
Normal file
@ -0,0 +1,373 @@
|
||||
; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @sin_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f64(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @sin(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = distinct !{!1, !2, !3}
|
||||
!2 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @sin_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f32(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @sinf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!21 = distinct !{!21, !22, !23}
|
||||
!22 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!23 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @sin_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f64_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.sin.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!31 = distinct !{!31, !32, !33}
|
||||
!32 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!33 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @sin_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @sin_f32_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.sin.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!41 = distinct !{!41, !42, !43}
|
||||
!42 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!43 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f64(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @cos(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!51 = distinct !{!51, !52, !53}
|
||||
!52 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!53 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f32(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @cosf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!61 = distinct !{!61, !62, !63}
|
||||
!62 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!63 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f64_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.cos.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!71 = distinct !{!71, !72, !73}
|
||||
!72 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!73 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @cos_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @cos_f32_intrinsic(
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.cos.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
!81 = distinct !{!81, !82, !83}
|
||||
!82 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!83 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @exp_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4v_expf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @expf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!91 = distinct !{!91, !92, !93}
|
||||
!92 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!93 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @exp_f32_intrin(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp_f32_intrin
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4v_expf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @llvm.exp.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!101 = distinct !{!101, !102, !103}
|
||||
!102 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!103 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
define void @log_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4v_logf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @logf(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!111 = distinct !{!111, !112, !113}
|
||||
!112 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!113 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f32
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4vv_powf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
|
||||
%tmp1 = load float, float* %arrayidx, align 4
|
||||
%tmp2 = tail call fast float @powf(float %conv, float %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %tmp2, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!121 = distinct !{!121, !122, !123}
|
||||
!122 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!123 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
|
||||
; CHECK-LABEL: @pow_f32_intrin
|
||||
; CHECK-LABEL: vector.body
|
||||
; CHECK: <4 x float> @_ZGVbN4vv_powf
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
|
||||
%tmp1 = load float, float* %arrayidx, align 4
|
||||
%tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
|
||||
%arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %tmp2, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!131 = distinct !{!131, !132, !133}
|
||||
!132 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!133 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
||||
declare double @sin(double) #0
|
||||
declare float @sinf(float) #0
|
||||
declare double @llvm.sin.f64(double) #0
|
||||
declare float @llvm.sin.f32(float) #0
|
||||
declare double @cos(double) #0
|
||||
declare float @cosf(float) #0
|
||||
declare double @llvm.cos.f64(double) #0
|
||||
declare float @llvm.cos.f32(float) #0
|
||||
declare float @expf(float) #0
|
||||
declare float @powf(float, float) #0
|
||||
declare float @llvm.exp.f32(float) #0
|
||||
declare float @logf(float) #0
|
||||
declare float @llvm.pow.f32(float, float) #0
|
@ -3,6 +3,8 @@
|
||||
; RUN: opt -vector-library=MASSV -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
|
||||
; RUN: opt -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
|
||||
; RUN: opt -vector-library=Accelerate -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
|
||||
; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
|
||||
; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
|
||||
; RUN: opt -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
@ -21,6 +23,9 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*)
|
||||
; ACCELERATE-SAME: [1 x i8*] [
|
||||
; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vlog10f to i8*)
|
||||
; LIBMVEC-X86-SAME: [2 x i8*] [
|
||||
; LIBMVEC-X86-SAME: i8* bitcast (<2 x double> (<2 x double>)* @_ZGVbN2v_sin to i8*),
|
||||
; LIBMVEC-X86-SAME: i8* bitcast (<4 x double> (<4 x double>)* @_ZGVdN4v_sin to i8*)
|
||||
; COMMON-SAME: ], section "llvm.metadata"
|
||||
|
||||
define double @sin_f64(double %in) {
|
||||
@ -28,6 +33,7 @@ define double @sin_f64(double %in) {
|
||||
; SVML: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
|
||||
; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
|
||||
; ACCELERATE: call double @sin(double %{{.*}})
|
||||
; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
|
||||
; No mapping of "sin" to a vector function for Accelerate.
|
||||
; ACCELERATE-NOT: _ZGV_LLVM_{{.*}}_sin({{.*}})
|
||||
%call = tail call double @sin(double %in)
|
||||
@ -39,10 +45,12 @@ declare double @sin(double) #0
|
||||
define float @call_llvm.log10.f32(float %in) {
|
||||
; COMMON-LABEL: @call_llvm.log10.f32(
|
||||
; SVML: call float @llvm.log10.f32(float %{{.*}})
|
||||
; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}})
|
||||
; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
|
||||
; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
|
||||
; No mapping of "llvm.log10.f32" to a vector function for SVML.
|
||||
; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
|
||||
; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
|
||||
%call = tail call float @llvm.log10.f32(float %in)
|
||||
ret float %call
|
||||
}
|
||||
@ -62,3 +70,7 @@ attributes #0 = { nounwind readnone }
|
||||
|
||||
; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"=
|
||||
; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" }
|
||||
|
||||
; LIBMVEC-X86: attributes #[[SIN]] = { "vector-function-abi-variant"=
|
||||
; LIBMVEC-X86-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin),
|
||||
; LIBMVEC-X86-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }
|
||||
|
Loading…
Reference in New Issue
Block a user