mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-13 22:00:14 +00:00
[GPGPU] Add llvm.powi to the libdevice supported functions
These intrinsics are used in COSMO. llvm-svn: 311324
This commit is contained in:
parent
03c2208d5f
commit
b09bd74da8
@ -1406,7 +1406,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) {
|
|||||||
|
|
||||||
return F->isIntrinsic() &&
|
return F->isIntrinsic() &&
|
||||||
(Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") ||
|
(Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") ||
|
||||||
Name.startswith("llvm.copysign"));
|
Name.startswith("llvm.copysign") || Name.startswith("llvm.powi"));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Do not take `Function` as a subtree value.
|
/// Do not take `Function` as a subtree value.
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_)
|
; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_)
|
||||||
; KERNEL-IR: declare float @llvm.sqrt.f32(float)
|
; KERNEL-IR: declare float @llvm.sqrt.f32(float)
|
||||||
; KERNEL-IR: declare float @llvm.fabs.f32(float)
|
; KERNEL-IR: declare float @llvm.fabs.f32(float)
|
||||||
|
; KERNEL-IR: declare float @llvm.powi.f32(float, i32)
|
||||||
|
|
||||||
; Check that kernel launch is generated in host IR.
|
; Check that kernel launch is generated in host IR.
|
||||||
; the declare would not be generated unless a call to a kernel exists.
|
; the declare would not be generated unless a call to a kernel exists.
|
||||||
@ -26,7 +27,8 @@
|
|||||||
; float tmp1 = sqrt(tmp1);
|
; float tmp1 = sqrt(tmp1);
|
||||||
; float tmp2 = fabs(tmp2);
|
; float tmp2 = fabs(tmp2);
|
||||||
; float tmp3 = copysignf(tmp1, tmp2);
|
; float tmp3 = copysignf(tmp1, tmp2);
|
||||||
; B[i] = tmp3;
|
; float tmp4 = powi(tmp3, 2);
|
||||||
|
; B[i] = tmp4;
|
||||||
; }
|
; }
|
||||||
; }
|
; }
|
||||||
|
|
||||||
@ -51,8 +53,9 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
|||||||
%sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val)
|
%sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val)
|
||||||
%fabs = tail call float @llvm.fabs.f32(float %sqrt);
|
%fabs = tail call float @llvm.fabs.f32(float %sqrt);
|
||||||
%copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs);
|
%copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs);
|
||||||
|
%powi = tail call float @llvm.powi.f32(float %copysign, i32 2);
|
||||||
%B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv
|
%B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv
|
||||||
store float %copysign, float* %B.arr.i, align 4
|
store float %powi, float* %B.arr.i, align 4
|
||||||
|
|
||||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
%wide.trip.count = zext i32 %N to i64
|
%wide.trip.count = zext i32 %N to i64
|
||||||
@ -70,6 +73,7 @@ for.end: ; preds = %for.cond.for.end_cr
|
|||||||
declare float @llvm.sqrt.f32(float) #0
|
declare float @llvm.sqrt.f32(float) #0
|
||||||
declare float @llvm.fabs.f32(float) #0
|
declare float @llvm.fabs.f32(float) #0
|
||||||
declare float @llvm.copysign.f32(float, float) #0
|
declare float @llvm.copysign.f32(float, float) #0
|
||||||
|
declare float @llvm.powi.f32(float, i32) #0
|
||||||
|
|
||||||
attributes #0 = { nounwind readnone }
|
attributes #0 = { nounwind readnone }
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user