mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-01 07:10:37 +00:00
SVML support for log2
Although LLVM supports vectorization of loops containing log2, it did not support using SVML implementation of it. Added support so that when clang is invoked with -fveclib=SVML now an appropriate SVML library log2 implementation will be invoked. Follow up on: https://reviews.llvm.org/D77114 Tests: Added unit tests to svml-calls.ll, svml-calls-finite.ll. Can be run with llvm-lint. Created a simple c++ file that tests log2, and used clang+ to build it, and output final assembly. Reviewed By: wenlei, craig.topper Differential Revision: https://reviews.llvm.org/D86730
This commit is contained in:
parent
b707c16b9c
commit
9ba54f747b
@ -245,6 +245,30 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
|
||||
TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8)
|
||||
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
|
||||
TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
|
||||
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
|
||||
TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
|
||||
|
@ -243,3 +243,60 @@ for.end:
|
||||
!71 = distinct !{!71, !72, !73}
|
||||
!72 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!73 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
declare float @__log2f_finite(float) #0
|
||||
|
||||
; CHECK-LABEL: @log2_f32
|
||||
; CHECK: <4 x float> @__svml_log2f4
|
||||
; CHECK: ret
|
||||
define void @log2_f32(float* nocapture %varray) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call fast float @__log2f_finite(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!81 = distinct !{!21, !22, !23}
|
||||
!82 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!83 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
|
||||
declare double @__log2_finite(double) #0
|
||||
|
||||
; CHECK-LABEL: @log2_f64
|
||||
; CHECK: <4 x double> @__svml_log24
|
||||
; CHECK: ret
|
||||
define void @log2_f64(double* nocapture %varray) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%tmp = trunc i64 %indvars.iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call fast double @__log2_finite(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!91 = distinct !{!31, !32, !33}
|
||||
!92 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!93 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
@ -28,6 +28,11 @@ declare float @logf(float) #0
|
||||
declare double @llvm.log.f64(double) #0
|
||||
declare float @llvm.log.f32(float) #0
|
||||
|
||||
declare double @log2(double) #0
|
||||
declare float @log2f(float) #0
|
||||
declare double @llvm.log2.f64(double) #0
|
||||
declare float @llvm.log2.f32(float) #0
|
||||
|
||||
declare double @exp2(double) #0
|
||||
declare float @exp2f(float) #0
|
||||
declare double @llvm.exp2.f64(double) #0
|
||||
@ -501,6 +506,98 @@ for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f64(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @log2(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f32(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f32(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @log2f(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f64_intrinsic(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f64_intrinsic(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to double
|
||||
%call = tail call double @llvm.log2.f64(double %conv)
|
||||
%arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
|
||||
store double %call, double* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @log2_f32_intrinsic(float* nocapture %varray) {
|
||||
; CHECK-LABEL: @log2_f32_intrinsic(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
|
||||
; CHECK: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%tmp = trunc i64 %iv to i32
|
||||
%conv = sitofp i32 %tmp to float
|
||||
%call = tail call float @llvm.log2.f32(float %conv)
|
||||
%arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
|
||||
store float %call, float* %arrayidx, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @exp2_f64(double* nocapture %varray) {
|
||||
; CHECK-LABEL: @exp2_f64(
|
||||
; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
|
||||
|
Loading…
Reference in New Issue
Block a user