diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 6e7ef9cc1766..aa4886dc0703 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -1406,7 +1406,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) { return F->isIntrinsic() && (Name.startswith("llvm.sqrt") || Name.startswith("llvm.fabs") || - Name.startswith("llvm.copysign")); + Name.startswith("llvm.copysign") || Name.startswith("llvm.powi")); } /// Do not take `Function` as a subtree value. diff --git a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll b/polly/test/GPGPU/intrinsic-copied-into-kernel.ll index 49f4b2b39c24..13df82f28b43 100644 --- a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll +++ b/polly/test/GPGPU/intrinsic-copied-into-kernel.ll @@ -14,6 +14,7 @@ ; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_) ; KERNEL-IR: declare float @llvm.sqrt.f32(float) ; KERNEL-IR: declare float @llvm.fabs.f32(float) +; KERNEL-IR: declare float @llvm.powi.f32(float, i32) ; Check that kernel launch is generated in host IR. ; the declare would not be generated unless a call to a kernel exists. @@ -26,7 +27,8 @@ ; float tmp1 = sqrt(tmp1); ; float tmp2 = fabs(tmp2); ; float tmp3 = copysignf(tmp1, tmp2); -; B[i] = tmp3; +; float tmp4 = powi(tmp3, 2); +; B[i] = tmp4; ; } ; } @@ -51,8 +53,9 @@ for.body: ; preds = %for.body.lr.ph, %fo %sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val) %fabs = tail call float @llvm.fabs.f32(float %sqrt); %copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs); + %powi = tail call float @llvm.powi.f32(float %copysign, i32 2); %B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv - store float %copysign, float* %B.arr.i, align 4 + store float %powi, float* %B.arr.i, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %wide.trip.count = zext i32 %N to i64 @@ -70,6 +73,7 @@ for.end: ; preds = %for.cond.for.end_cr declare float @llvm.sqrt.f32(float) #0 declare float @llvm.fabs.f32(float) #0 declare float @llvm.copysign.f32(float, float) #0 +declare float @llvm.powi.f32(float, i32) #0 attributes #0 = { nounwind readnone }