mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-24 18:20:38 +00:00
[HIP] Fix device lib selection
Choose optimized device lib bitcode by fp options for performance. Reviewed by: Artem Belevich, Fangrui Song Differential Revision: https://reviews.llvm.org/D101654
This commit is contained in:
parent
1fcf9247de
commit
c58a6a6fb4
clang
include/clang
lib
test
@ -172,7 +172,8 @@ CODEGENOPT(NoInlineLineTables, 1, 0) ///< Whether debug info should contain
|
||||
CODEGENOPT(StackClashProtector, 1, 0) ///< Set when -fstack-clash-protection is enabled.
|
||||
CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled.
|
||||
CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
|
||||
CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
|
||||
CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
|
||||
CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt
|
||||
CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
|
||||
CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.
|
||||
|
||||
|
@ -834,7 +834,7 @@ def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group
|
||||
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
|
||||
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">,
|
||||
MarshallingInfoFlag<CodeGenOpts<"CorrectlyRoundedDivSqrt">>;
|
||||
MarshallingInfoFlag<CodeGenOpts<"OpenCLCorrectlyRoundedDivSqrt">>;
|
||||
def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">,
|
||||
MarshallingInfoFlag<CodeGenOpts<"UniformWGSize">>;
|
||||
@ -939,6 +939,13 @@ defm hip_new_launch_api : BoolFOption<"hip-new-launch-api",
|
||||
LangOpts<"HIPUseNewLaunchAPI">, DefaultFalse,
|
||||
PosFlag<SetTrue, [CC1Option], "Use">, NegFlag<SetFalse, [], "Don't use">,
|
||||
BothFlags<[], " new kernel launching API for HIP">>;
|
||||
defm hip_fp32_correctly_rounded_divide_sqrt : BoolFOption<"hip-fp32-correctly-rounded-divide-sqrt",
|
||||
CodeGenOpts<"HIPCorrectlyRoundedDivSqrt">, DefaultTrue,
|
||||
PosFlag<SetTrue, [], "Specify">,
|
||||
NegFlag<SetFalse, [CC1Option], "Don't specify">,
|
||||
BothFlags<[], " that single precision floating-point divide and sqrt used in "
|
||||
"the program source are correctly rounded (HIP device compilation only)">>,
|
||||
ShouldParseIf<hip.KeyPath>;
|
||||
defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init",
|
||||
LangOpts<"GPUAllowDeviceInit">, DefaultFalse,
|
||||
PosFlag<SetTrue, [CC1Option], "Allow">, NegFlag<SetFalse, [], "Don't allow">,
|
||||
|
@ -3216,8 +3216,10 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
|
||||
llvm::Value *Val;
|
||||
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
|
||||
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
|
||||
if (CGF.getLangOpts().OpenCL &&
|
||||
!CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) {
|
||||
if ((CGF.getLangOpts().OpenCL &&
|
||||
!CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
|
||||
(CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice &&
|
||||
!CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
|
||||
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
|
||||
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
|
||||
// build option allows an application to specify that single precision
|
||||
|
@ -404,11 +404,17 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
|
||||
bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
|
||||
options::OPT_fno_gpu_flush_denormals_to_zero,
|
||||
getDefaultDenormsAreZeroForTarget(Kind));
|
||||
// TODO: Check standard C++ flags?
|
||||
bool FiniteOnly = false;
|
||||
bool UnsafeMathOpt = false;
|
||||
bool FastRelaxedMath = false;
|
||||
bool CorrectSqrt = true;
|
||||
bool FiniteOnly =
|
||||
DriverArgs.hasFlag(options::OPT_ffinite_math_only,
|
||||
options::OPT_fno_finite_math_only, false);
|
||||
bool UnsafeMathOpt =
|
||||
DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
|
||||
options::OPT_fno_unsafe_math_optimizations, false);
|
||||
bool FastRelaxedMath = DriverArgs.hasFlag(
|
||||
options::OPT_ffast_math, options::OPT_fno_fast_math, false);
|
||||
bool CorrectSqrt = DriverArgs.hasFlag(
|
||||
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
|
||||
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
|
||||
bool Wave64 = isWave64(DriverArgs, Kind);
|
||||
|
||||
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
|
||||
|
35
clang/test/CodeGenCUDA/correctly-rounded-div.cu
Normal file
35
clang/test/CodeGenCUDA/correctly-rounded-div.cu
Normal file
@ -0,0 +1,35 @@
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
|
||||
// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
|
||||
// RUN: | FileCheck --check-prefixes=COMMON,CRDIV %s
|
||||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple -amdgcn-amd-amdhsa \
|
||||
// RUN: -target-cpu gfx906 -fcuda-is-device -x hip \
|
||||
// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \
|
||||
// RUN: | FileCheck --check-prefixes=COMMON,NCRDIV %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
typedef __attribute__(( ext_vector_type(4) )) float float4;
|
||||
|
||||
// COMMON-LABEL: @_Z11spscalardiv
|
||||
// COMMON: fdiv{{.*}},
|
||||
// NCRDIV: !fpmath ![[MD:[0-9]+]]
|
||||
// CRDIV-NOT: !fpmath
|
||||
__device__ float spscalardiv(float a, float b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
// COMMON-LABEL: @_Z11spvectordiv
|
||||
// COMMON: fdiv{{.*}},
|
||||
// NCRDIV: !fpmath ![[MD]]
|
||||
// CRDIV-NOT: !fpmath
|
||||
__device__ float4 spvectordiv(float4 a, float4 b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
// COMMON-LABEL: @_Z11dpscalardiv
|
||||
// COMMON-NOT: !fpmath
|
||||
__device__ double dpscalardiv(double a, double b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
// NCRDIV: ![[MD]] = !{float 2.500000e+00}
|
@ -113,6 +113,30 @@
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST
|
||||
|
||||
// Test -fast-math
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
|
||||
// RUN: -ffast-math --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck %s --check-prefixes=FAST
|
||||
|
||||
// Test -ffinite-math-only
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
|
||||
// RUN: -ffinite-math-only --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck %s --check-prefixes=FINITE
|
||||
|
||||
// Test -funsafe-math-optimizations
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
|
||||
// RUN: -funsafe-math-optimizations --rocm-path=%S/Inputs/rocm \
|
||||
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck %s --check-prefixes=UNSAFE
|
||||
|
||||
// Test -fno-hip-fp32-correctly-rounded-divide-sqrt
|
||||
// RUN: %clang -### -target x86_64-linux-gnu --offload-arch=gfx900 \
|
||||
// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \
|
||||
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
|
||||
// RUN: 2>&1 | FileCheck %s --check-prefixes=DIVSQRT
|
||||
|
||||
// ALL-NOT: error:
|
||||
// ALL: {{"[^"]*clang[^"]*"}}
|
||||
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc"
|
||||
@ -128,3 +152,23 @@
|
||||
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"
|
||||
// ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc"
|
||||
// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc"
|
||||
|
||||
// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
|
||||
// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
|
||||
// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
|
||||
// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
|
||||
|
||||
// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
|
||||
// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
|
||||
// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
|
||||
// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
|
||||
|
||||
// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
|
||||
// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
|
||||
// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
|
||||
// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
|
||||
|
||||
// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
|
||||
// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
|
||||
// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
|
||||
// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc"
|
||||
|
Loading…
x
Reference in New Issue
Block a user