[OpenMP] Introduce kernel environment

This patch introduces per kernel environment. Previously, flags such as execution mode are set through global variables with name like `__kernel_name_exec_mode`. They are accessible on the host by reading the corresponding global variable, but not from the device. Besides, some assumptions, such as no nested parallelism, are not per kernel basis, preventing us applying per kernel optimization in the device runtime.

This is a combination and refinement of patch series D116908, D116909, and D116910.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D142569
This commit is contained in:
Shilei Tian 2023-04-22 20:45:00 -04:00
parent 6510163242
commit 35cfadfbe2
82 changed files with 8862 additions and 3386 deletions

View File

@ -781,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
emitGenericVarsEpilog(CGF);
CGBuilderTy &Bld = CGF.Builder;
OMPBuilder.createTargetDeinit(Bld, IsSPMD);
OMPBuilder.createTargetDeinit(Bld);
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@ -819,24 +819,6 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
IsInTTDRegion = false;
}
// Create a unique global variable to indicate the execution mode of this target
// region. The execution mode is either 'generic', or 'spmd' depending on the
// target directive. This variable is picked up by the offload library to setup
// the device appropriately before kernel launch. If the execution mode is
// 'generic', the runtime reserves one warp for the master, otherwise, all
// warps participate in parallel work.
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
bool Mode) {
auto *GVMode = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
llvm::GlobalValue::WeakAnyLinkage,
llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC),
Twine(Name, "_exec_mode"));
GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
CGM.addCompilerUsedGlobal(GVMode);
}
void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@ -853,8 +835,6 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
else
emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)

View File

@ -36,7 +36,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_int_l9_kernel_environment to ptr))
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@ -54,7 +54,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[__SGN_ASCAST_I]], align 4
// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i32 [[XOR_I]], [[TMP5]]
// CHECK-NEXT: store i32 [[SUB_I]], ptr [[L1_ASCAST]], align 4
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
@ -71,7 +71,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_l16_kernel_environment to ptr))
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@ -89,7 +89,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8
// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]]
// CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
@ -106,7 +106,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: [[L1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L1]] to ptr
// CHECK-NEXT: store i64 [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_math_long_long_l23_kernel_environment to ptr))
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@ -124,7 +124,7 @@ void test_math_long_long(long long x) {
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[__SGN_ASCAST_I]], align 8
// CHECK-NEXT: [[SUB_I:%.*]] = sub nsw i64 [[XOR_I]], [[TMP5]]
// CHECK-NEXT: store i64 [[SUB_I]], ptr [[L1_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void

View File

@ -37,7 +37,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr))
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@ -61,7 +61,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK: for.end:
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
//
//
@ -78,7 +78,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr))
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@ -109,6 +109,6 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: ret void
// CHECK: omp.inner.for.end:
// CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
//

View File

@ -33,11 +33,11 @@ void write_to_aligned_array(int *a, int N) {
// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_kernel_environment to ptr))
// CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-AMD: user_code.entry:
// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4
// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
@ -45,7 +45,7 @@ void write_to_aligned_array(int *a, int N) {
// CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// CHECK-AMD-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]]
// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// CHECK-AMD-NEXT: call void @__kmpc_target_deinit()
// CHECK-AMD-NEXT: ret void
// CHECK-AMD: worker.exit:
// CHECK-AMD-NEXT: ret void

View File

@ -31,15 +31,15 @@ int maini1() {
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void

File diff suppressed because it is too large Load Diff

View File

@ -36,13 +36,13 @@ void test_ds(){
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_kernel_environment)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: store i32 10, ptr [[A]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
@ -56,7 +56,7 @@ void test_ds(){
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void

View File

@ -44,18 +44,18 @@ int main(int argc, char **argv) {
// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
@ -360,18 +360,18 @@ int main(int argc, char **argv) {
// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment)
// CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK5: user_code.entry:
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK5-NEXT: call void @__kmpc_target_deinit()
// CHECK5-NEXT: ret void
// CHECK5: worker.exit:
// CHECK5-NEXT: ret void

View File

@ -29,13 +29,13 @@ int main() {
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -64,12 +64,12 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -105,13 +105,13 @@ int main() {
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -140,12 +140,12 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -40,16 +40,16 @@ int main() {
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -150,16 +150,16 @@ int main() {
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -81,18 +81,18 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -192,7 +192,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -213,7 +213,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -255,7 +255,7 @@ int bar(int n){
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -270,7 +270,7 @@ int bar(int n){
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -344,18 +344,18 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -455,7 +455,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -476,7 +476,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -518,7 +518,7 @@ int bar(int n){
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -533,7 +533,7 @@ int bar(int n){
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -41,12 +41,12 @@ int bar(int n){
// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_kernel_environment)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
@ -59,7 +59,7 @@ int bar(int n){
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void

View File

@ -153,17 +153,17 @@ void unreachable_call() {
// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8
// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -192,11 +192,11 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -207,7 +207,7 @@ void unreachable_call() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -221,7 +221,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -256,7 +256,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -300,7 +300,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
// CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -330,7 +330,7 @@ void unreachable_call() {
// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -351,7 +351,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -374,7 +374,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -397,7 +397,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
// CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -426,7 +426,7 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK1-SAME: () #[[ATTR4]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -435,7 +435,7 @@ void unreachable_call() {
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1: 1:
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
//
//
@ -449,7 +449,7 @@ void unreachable_call() {
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -465,7 +465,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -523,17 +523,17 @@ void unreachable_call() {
// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4
// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -562,11 +562,11 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK2-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -577,7 +577,7 @@ void unreachable_call() {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -591,7 +591,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -626,7 +626,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -670,7 +670,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
// CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -700,7 +700,7 @@ void unreachable_call() {
// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -721,7 +721,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -744,7 +744,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -767,7 +767,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
// CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -796,7 +796,7 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK2-SAME: () #[[ATTR4]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -805,7 +805,7 @@ void unreachable_call() {
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2: 1:
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
//
//
@ -819,7 +819,7 @@ void unreachable_call() {
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -835,7 +835,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -58,15 +58,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -103,7 +103,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -115,7 +115,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -159,15 +159,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -204,7 +204,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -216,7 +216,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -53,15 +53,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -100,7 +100,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -113,7 +113,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -157,15 +157,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -204,7 +204,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -217,7 +217,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -52,141 +52,17 @@ int bar(int n){
}
#endif
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
// CHECK-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
// CHECK-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
// CHECK-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: ret void
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-64-NEXT: entry:
// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -209,7 +85,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -221,7 +97,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -257,7 +133,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -277,7 +153,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -316,13 +192,13 @@ int bar(int n){
// CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-32-NEXT: entry:
// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -345,7 +221,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -357,7 +233,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -393,7 +269,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -413,7 +289,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -452,13 +328,13 @@ int bar(int n){
// CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -481,7 +357,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -493,7 +369,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -529,7 +405,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -549,7 +425,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -588,13 +464,13 @@ int bar(int n){
// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -617,7 +493,7 @@ int bar(int n){
// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -629,7 +505,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -665,7 +541,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -685,7 +561,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -724,13 +600,13 @@ int bar(int n){
// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -753,7 +629,7 @@ int bar(int n){
// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -765,7 +641,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -801,7 +677,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -821,7 +697,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -860,13 +736,13 @@ int bar(int n){
// CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -889,7 +765,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -901,7 +777,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -937,7 +813,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -957,7 +833,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void

View File

@ -104,17 +104,17 @@ int bar(int n){
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8*
// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i64 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -303,7 +303,7 @@ int bar(int n){
// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -315,7 +315,7 @@ int bar(int n){
// CHECK-64-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8
// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i64 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -559,7 +559,7 @@ int bar(int n){
// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -572,7 +572,7 @@ int bar(int n){
// CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8
// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i64 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -836,17 +836,17 @@ int bar(int n){
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8*
// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -1035,7 +1035,7 @@ int bar(int n){
// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1047,7 +1047,7 @@ int bar(int n){
// CHECK-32-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4
// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -1291,7 +1291,7 @@ int bar(int n){
// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1304,7 +1304,7 @@ int bar(int n){
// CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -1568,17 +1568,17 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4
// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8*
// CHECK-32-EX-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined to i8*), i8* null, i8** [[TMP5]], i32 1)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -1767,7 +1767,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -1779,7 +1779,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4
// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined to i8*), i8* null, i8** [[TMP7]], i32 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -2023,7 +2023,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(%struct.KernelEnvironmentTy* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -2036,7 +2036,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined to i8*), i8* null, i8** [[TMP8]], i32 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void

View File

@ -35,15 +35,15 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -527,7 +527,7 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -535,7 +535,7 @@ void test() {
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void

View File

@ -45,7 +45,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -58,7 +58,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -67,12 +67,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-64-SAME: () #[[ATTR0]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -84,7 +84,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -99,7 +99,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
//
@ -108,7 +108,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -121,7 +121,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -130,12 +130,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-32-SAME: () #[[ATTR0]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -147,7 +147,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -162,6 +162,6 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//

View File

@ -72,7 +72,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -124,7 +124,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
//
//
@ -142,7 +142,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -198,7 +198,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
//
//
@ -211,7 +211,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -244,7 +244,7 @@ int bar(int n){
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
//
//
@ -261,7 +261,7 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -299,7 +299,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
//
//
@ -317,7 +317,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -368,7 +368,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
//
//
@ -386,7 +386,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -441,7 +441,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
//
//
@ -454,7 +454,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -486,7 +486,7 @@ int bar(int n){
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
//
//
@ -503,7 +503,7 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -540,7 +540,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
//
//
@ -558,7 +558,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -609,7 +609,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
//
//
@ -627,7 +627,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -682,7 +682,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
//
//
@ -695,7 +695,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -727,7 +727,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
//
//
@ -744,7 +744,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -781,7 +781,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
//
//
@ -799,7 +799,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -851,7 +851,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
//
@ -869,7 +869,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -925,7 +925,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
//
@ -938,7 +938,7 @@ int bar(int n){
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -971,7 +971,7 @@ int bar(int n){
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
//
@ -988,7 +988,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -1026,7 +1026,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
//
@ -1044,7 +1044,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1095,7 +1095,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//
//
@ -1113,7 +1113,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1168,7 +1168,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//
//
@ -1181,7 +1181,7 @@ int bar(int n){
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1213,7 +1213,7 @@ int bar(int n){
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//
//
@ -1230,7 +1230,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -1267,7 +1267,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//
//
@ -1285,7 +1285,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -1336,7 +1336,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
//
//
@ -1354,7 +1354,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -1409,7 +1409,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
//
//
@ -1422,7 +1422,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -1454,7 +1454,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
//
//
@ -1471,7 +1471,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -1508,6 +1508,6 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
//

View File

@ -57,18 +57,18 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -95,7 +95,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -106,7 +106,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -133,7 +133,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -144,7 +144,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -209,18 +209,18 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -247,7 +247,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -258,7 +258,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -285,7 +285,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -296,7 +296,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -37,15 +37,15 @@ int bar(int n){
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -155,15 +155,15 @@ int bar(int n){
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -85,11 +85,11 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@ -99,7 +99,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -396,7 +396,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -407,7 +407,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -645,7 +645,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -653,7 +653,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -833,7 +833,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -844,7 +844,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -1059,7 +1059,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -1070,7 +1070,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -1371,7 +1371,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -1383,7 +1383,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -1635,11 +1635,11 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@ -1649,7 +1649,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1946,7 +1946,7 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1957,7 +1957,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -2195,7 +2195,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -2203,7 +2203,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -2383,7 +2383,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -2394,7 +2394,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -2609,7 +2609,7 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -2620,7 +2620,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -2916,7 +2916,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -2928,7 +2928,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -3180,11 +3180,11 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -3194,7 +3194,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -3484,7 +3484,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -3495,7 +3495,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -3727,7 +3727,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -3735,7 +3735,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -3909,7 +3909,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -3920,7 +3920,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -4128,7 +4128,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -4139,7 +4139,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -4443,7 +4443,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -4455,7 +4455,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void

View File

@ -43,11 +43,11 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
@ -57,7 +57,7 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -341,11 +341,11 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
@ -355,7 +355,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -73,11 +73,11 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@ -87,7 +87,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -408,7 +408,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -419,7 +419,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -681,7 +681,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -689,7 +689,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -883,7 +883,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -894,7 +894,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -1128,11 +1128,11 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -1142,7 +1142,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1456,7 +1456,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1467,7 +1467,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1723,7 +1723,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1731,7 +1731,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1919,7 +1919,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1930,7 +1930,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void

View File

@ -81,11 +81,11 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@ -95,7 +95,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -246,7 +246,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -257,7 +257,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -396,7 +396,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -404,7 +404,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -515,7 +515,7 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@ -526,7 +526,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@ -664,11 +664,11 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -678,7 +678,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -828,7 +828,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -839,7 +839,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -977,7 +977,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -985,7 +985,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -1095,7 +1095,7 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@ -1106,7 +1106,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@ -1242,11 +1242,11 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -1256,7 +1256,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -1406,7 +1406,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -1417,7 +1417,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -1555,7 +1555,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -1563,7 +1563,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -1673,7 +1673,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@ -1684,7 +1684,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@ -1820,11 +1820,11 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@ -1834,7 +1834,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -1985,7 +1985,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -1996,7 +1996,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -2135,7 +2135,7 @@ int bar(int n){
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -2143,7 +2143,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -2254,7 +2254,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@ -2265,7 +2265,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@ -2403,11 +2403,11 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -2417,7 +2417,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -2567,7 +2567,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -2578,7 +2578,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -2716,7 +2716,7 @@ int bar(int n){
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -2724,7 +2724,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -2834,7 +2834,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@ -2845,7 +2845,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@ -2981,11 +2981,11 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@ -2995,7 +2995,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -3145,7 +3145,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -3156,7 +3156,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -3294,7 +3294,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -3302,7 +3302,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@ -3412,7 +3412,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@ -3423,7 +3423,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void

View File

@ -83,19 +83,19 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -122,7 +122,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -134,7 +134,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -161,19 +161,19 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -200,7 +200,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -212,7 +212,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -243,19 +243,19 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -286,7 +286,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -298,7 +298,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -329,19 +329,19 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
@ -372,7 +372,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
@ -384,7 +384,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void

View File

@ -57,19 +57,19 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -328,7 +328,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -344,7 +344,7 @@ int bar(int n){
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -677,7 +677,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@ -685,7 +685,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@ -1250,17 +1250,17 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1519,7 +1519,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1535,7 +1535,7 @@ int bar(int n){
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -1868,7 +1868,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@ -1876,7 +1876,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@ -2441,17 +2441,17 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -2710,7 +2710,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -2726,7 +2726,7 @@ int bar(int n){
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@ -3059,7 +3059,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@ -3067,7 +3067,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void

View File

@ -103,16 +103,16 @@ int main()
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void

View File

@ -84,9 +84,9 @@ void spmd(void) {
}
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
struct KernelEnvironmentTy;
__attribute__((weak))
extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target

View File

@ -40,9 +40,9 @@ void spmd(void) {
}
#pragma omp begin declare target device_type(nohost)
__attribute__((weak))
extern "C" int __kmpc_target_init(void *Ident, char Mode,
bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
struct KernelEnvironmentTy;
__attribute__((weak))
extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target

View File

@ -95,7 +95,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
@ -113,7 +113,7 @@ int main() {
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
@ -308,11 +308,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]])
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]]
@ -325,8 +325,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
@ -517,11 +517,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
@ -530,8 +530,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]

View File

@ -89,7 +89,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
@ -110,7 +110,7 @@ int main() {
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
@ -383,11 +383,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
@ -400,8 +400,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
@ -466,7 +466,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
@ -559,7 +559,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
//
//
@ -665,11 +665,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
@ -678,8 +678,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
@ -749,7 +749,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
@ -846,7 +846,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
//
//

View File

@ -1721,8 +1721,7 @@ public:
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
void createTargetDeinit(const LocationDescription &Loc);
///}

View File

@ -96,6 +96,11 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32)
__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
__OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
Int8, Int8, Int8)
__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false,
ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr)
#undef __OMP_STRUCT_TYPE
#undef OMP_STRUCT_TYPE
@ -452,8 +457,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8)
__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr)
__OMP_RTL(__kmpc_target_deinit, false, Void,)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
@ -1012,9 +1017,9 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt))
__OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt,
ParamAttrs(AttributeSet(), SExt, SExt))
ParamAttrs(AttributeSet()))
__OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(),
ParamAttrs(AttributeSet(), SExt))
ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
AttributeSet(), AttributeSet(), AttributeSet(),

View File

@ -22,6 +22,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
@ -3898,14 +3899,60 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD);
ConstantInt *MayUseNestedParallelismVal =
ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true);
ConstantInt *DebugIndentionLevelVal =
ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0);
// We need to strip the debug prefix to get the correct kernel name.
Function *Kernel = Builder.GetInsertBlock()->getParent();
StringRef KernelName = Kernel->getName();
const std::string DebugPrefix = "_debug__";
if (KernelName.ends_with(DebugPrefix))
KernelName = KernelName.drop_back(DebugPrefix.length());
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
const DataLayout &DL = Fn->getParent()->getDataLayout();
CallInst *ThreadKind = Builder.CreateCall(
Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
Twine DynamicEnvironmentName = KernelName + "_dynamic_environment";
Constant *DynamicEnvironmentInitializer =
ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal});
Constant *DynamicEnvironmentGV = new GlobalVariable(
M, DynamicEnvironment, /* IsConstant */ false,
GlobalValue::InternalLinkage, DynamicEnvironmentInitializer,
DynamicEnvironmentName,
/* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
DL.getDefaultGlobalsAddressSpace());
if (DynamicEnvironmentGV->getType() != DynamicEnvironmentPtr)
DynamicEnvironmentGV = ConstantExpr::getAddrSpaceCast(
DynamicEnvironmentGV, DynamicEnvironmentPtr);
Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get(
ConfigurationEnvironment, {
UseGenericStateMachineVal,
MayUseNestedParallelismVal,
IsSPMDVal,
});
Constant *KernelEnvironmentInitializer = ConstantStruct::get(
KernelEnvironment, {
ConfigurationEnvironmentInitializer,
Ident,
DynamicEnvironmentGV,
});
Twine KernelEnvironmentName = KernelName + "_kernel_environment";
Constant *KernelEnvironmentGV = new GlobalVariable(
M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage,
KernelEnvironmentInitializer, KernelEnvironmentName,
/* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
DL.getDefaultGlobalsAddressSpace());
if (KernelEnvironmentGV->getType() != KernelEnvironmentPtr)
KernelEnvironmentGV = ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
KernelEnvironmentPtr);
CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV});
Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
@ -3938,22 +3985,14 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
}
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
bool IsSPMD) {
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) {
if (!updateToLocation(Loc))
return;
uint32_t SrcLocStrSize;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
Builder.CreateCall(Fn, {Ident, IsSPMDVal});
Builder.CreateCall(Fn, {});
}
void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(

View File

@ -31,6 +31,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
@ -178,6 +179,80 @@ STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
static constexpr auto TAG = "[" DEBUG_TYPE "]";
#endif
namespace KernelInfo {
// struct ConfigurationEnvironmentTy {
// uint8_t UseGenericStateMachine;
// uint8_t MayUseNestedParallelism;
// llvm::omp::OMPTgtExecModeFlags ExecMode;
// };
// struct DynamicEnvironmentTy {
// uint16_t DebugIndentionLevel;
// };
// struct KernelEnvironmentTy {
// ConfigurationEnvironmentTy Configuration;
// IdentTy *Ident;
// DynamicEnvironmentTy *DynamicEnv;
// };
#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
constexpr const unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_IDX(Configuration, 0)
KERNEL_ENVIRONMENT_IDX(Ident, 1)
#undef KERNEL_ENVIRONMENT_IDX
#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
constexpr const unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX
#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \
RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \
return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx)); \
}
KERNEL_ENVIRONMENT_GETTER(Ident, Constant)
KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
#undef KERNEL_ENVIRONMENT_GETTER
#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \
ConstantInt *get##MEMBER##FromKernelEnvironment( \
ConstantStruct *KernelEnvC) { \
ConstantStruct *ConfigC = \
getConfigurationFromKernelEnvironment(KernelEnvC); \
return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx)); \
}
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER
GlobalVariable *
getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
constexpr const int InitKernelEnvironmentArgNo = 0;
return cast<GlobalVariable>(
KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
->stripPointerCasts());
}
ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) {
GlobalVariable *KernelEnvGV =
getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
return cast<ConstantStruct>(KernelEnvGV->getInitializer());
}
} // namespace KernelInfo
namespace {
struct AAHeapToShared;
@ -610,6 +685,10 @@ struct KernelInfoState : AbstractState {
/// one we abort as the kernel is malformed.
CallBase *KernelInitCB = nullptr;
/// The constant kernel environement as taken from and passed to
/// __kmpc_target_init.
ConstantStruct *KernelEnvC = nullptr;
/// The __kmpc_target_deinit call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelDeinitCB = nullptr;
@ -714,6 +793,12 @@ struct KernelInfoState : AbstractState {
"assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
if (KIS.KernelEnvC) {
if (KernelEnvC && KernelEnvC != KIS.KernelEnvC)
llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.");
KernelEnvC = KIS.KernelEnvC;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
@ -2780,9 +2865,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
const int InitModeArgNo = 1;
auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
ConstantStruct *KernelEnvC =
KernelInfo::getKernelEnvironementFromKernelInitCB(CB);
ConstantInt *ExecModeC =
KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC;
}
if (C->isZero()) {
@ -3469,6 +3556,29 @@ struct AAKernelInfoFunction : AAKernelInfo {
return GuardedInstructions;
}
void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) {
Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction(
KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx});
assert(NewKernelEnvC && "Failed to create new kernel environment");
KernelEnvC = cast<ConstantStruct>(NewKernelEnvC);
}
#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \
void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \
ConstantStruct *ConfigC = \
KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \
Constant *NewConfigC = ConstantFoldInsertValueInstruction( \
ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \
assert(NewConfigC && "Failed to create new configuration environment"); \
setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC)); \
}
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@ -3517,61 +3627,52 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReachingKernelEntries.insert(Fn);
IsKernelEntry = true;
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
// knows the constant arguments to __kmpc_target_init and
// __kmpc_target_deinit might actually change.
KernelEnvC =
KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
GlobalVariable *KernelEnvGV =
KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> std::optional<Value *> {
return nullptr;
Attributor::GlobalVariableSimplifictionCallbackTy
KernelConfigurationSimplifyCB =
[&](const GlobalVariable &GV, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> std::optional<Constant *> {
return KernelEnvC;
};
Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> std::optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
// __kmpc_target_init or
// __kmpc_target_deinit call. We will answer this one with the internal
// state.
if (!SPMDCompatibilityTracker.isValidState())
return nullptr;
if (!SPMDCompatibilityTracker.isAtFixpoint()) {
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
UsedAssumedInformation = true;
} else {
UsedAssumedInformation = false;
}
auto *Val = ConstantInt::getSigned(
IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};
constexpr const int InitModeArgNo = 1;
constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
ModeSimplifyCB);
A.registerGlobalVariableSimplificationCallback(
*KernelEnvGV, KernelConfigurationSimplifyCB);
// Check if we know we are in SPMD-mode already.
ConstantInt *ModeArg =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
ConstantInt *ExecModeC =
KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedExecModeC = ConstantInt::get(
ExecModeC->getType(),
ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
// This is a generic region but SPMDization is disabled so stop tracking.
else if (DisableOpenMPOptSPMDization)
// This is a generic region but SPMDization is disabled so stop
// tracking.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
else
setExecModeOfKernelEnvironment(AssumedExecModeC);
ConstantInt *MayUseNestedParallelismC =
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
MayUseNestedParallelismC->getType(), NestedParallelism);
setMayUseNestedParallelismOfKernelEnvironment(
AssumedMayUseNestedParallelismC);
if (!DisableOpenMPOptStateMachineRewrite) {
ConstantInt *UseGenericStateMachineC =
KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
KernelEnvC);
ConstantInt *AssumedUseGenericStateMachineC =
ConstantInt::get(UseGenericStateMachineC->getType(), false);
setUseGenericStateMachineOfKernelEnvironment(
AssumedUseGenericStateMachineC);
}
// Register virtual uses of functions we might need to preserve.
auto RegisterVirtualUse = [&](RuntimeFunction RFKind,
@ -3672,21 +3773,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
/// Insert nested Parallelism global variable
Function *Kernel = getAnchorScope();
Module &M = *Kernel->getParent();
Type *Int8Ty = Type::getInt8Ty(M.getContext());
new GlobalVariable(M, Int8Ty, /* isConstant */ true,
GlobalValue::WeakAnyLinkage,
ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0),
Kernel->getName() + "_nested_parallelism");
ChangeStatus Changed = ChangeStatus::UNCHANGED;
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (!changeToSPMDMode(A, Changed)) {
if (!KernelInitCB->getCalledFunction()->isDeclaration())
return buildCustomStateMachine(A);
Changed |= buildCustomStateMachine(A);
}
// At last, update the KernelEnvc
GlobalVariable *KernelEnvGV =
KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
if (KernelEnvGV->getInitializer() != KernelEnvC) {
KernelEnvGV->setInitializer(KernelEnvC);
Changed = ChangeStatus::CHANGED;
}
return Changed;
@ -3756,14 +3857,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Find escaping outputs from the guarded region to outside users and
// broadcast their values to them.
for (Instruction &I : *RegionStartBB) {
SmallPtrSet<Instruction *, 4> OutsideUsers;
for (User *Usr : I.users()) {
Instruction &UsrI = *cast<Instruction>(Usr);
SmallVector<Use *, 4> OutsideUses;
for (Use &U : I.uses()) {
Instruction &UsrI = *cast<Instruction>(U.getUser());
if (UsrI.getParent() != RegionStartBB)
OutsideUsers.insert(&UsrI);
OutsideUses.push_back(&U);
}
if (OutsideUsers.empty())
if (OutsideUses.empty())
continue;
HasBroadcastValues = true;
@ -3786,8 +3887,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
RegionBarrierBB->getTerminator());
// Emit a load instruction and replace uses of the output value.
for (Instruction *UsrI : OutsideUsers)
UsrI->replaceUsesOfWith(&I, LoadI);
for (Use *U : OutsideUses)
A.changeUseAfterManifest(*U, *LoadI);
}
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@ -4014,16 +4115,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
// Check if the kernel is already in SPMD mode, if so, return success.
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
"ExecMode is not an integer!");
const int8_t ExecModeVal =
cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
ConstantStruct *ExistingKernelEnvC =
KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
auto *ExecModeC =
KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
const int8_t ExecModeVal = ExecModeC->getSExtValue();
if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
return true;
@ -4041,27 +4137,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// kernel is executed in.
assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
"Initially non-SPMD kernel has SPMD exec mode!");
ExecMode->setInitializer(
ConstantInt::get(ExecMode->getInitializer()->getType(),
ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
const int InitModeArgNo = 1;
const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, false));
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
setExecModeOfKernelEnvironment(ConstantInt::get(
ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
++NumOpenMPTargetRegionKernelsSPMD;
@ -4088,30 +4165,29 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
return ChangeStatus::UNCHANGED;
const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
ConstantStruct *ExistingKernelEnvC =
KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
// Check if the current configuration is non-SPMD and generic state machine.
// If we already have SPMD mode or a custom state machine we do not need to
// go any further. If it is anything but a constant something is weird and
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
ConstantInt *Mode =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
ConstantInt *UseStateMachineC =
KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
ExistingKernelEnvC);
ConstantInt *ModeC =
KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
(Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
if (UseStateMachineC->isZero() ||
(ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
auto &Ctx = getAnchorValue().getContext();
auto *FalseVal = ConstantInt::getBool(Ctx, false);
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
setUseGenericStateMachineOfKernelEnvironment(
ConstantInt::get(UseStateMachineC->getType(), false));
// If we don't actually need a state machine we are done here. This can
// happen if there simply are no parallel regions. In the resulting kernel
@ -4190,6 +4266,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// UserCodeEntryBB: // user code
// __kmpc_target_deinit(...)
//
auto &Ctx = getAnchorValue().getContext();
Function *Kernel = getAssociatedFunction();
assert(Kernel && "Expected an associated function!");
@ -4272,7 +4349,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
StateMachineBeginBB->end()),
DLoc));
Value *Ident = KernelInitCB->getArgOperand(0);
Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC);
Value *GTid = KernelInitCB;
FunctionCallee BarrierFn =
@ -4402,6 +4479,46 @@ struct AAKernelInfoFunction : AAKernelInfo {
ChangeStatus updateImpl(Attributor &A) override {
KernelInfoState StateBefore = getState();
// When we leave this function this RAII will make sure the member
// KernelEnvC is updated properly depending on the state. That member is
// used for simplification of values and needs to be up to date at all
// times.
struct UpdateKernelEnvCRAII {
AAKernelInfoFunction &AA;
UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {}
~UpdateKernelEnvCRAII() {
if (!AA.KernelEnvC)
return;
ConstantStruct *ExistingKernelEnvC =
KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB);
if (!AA.isValidState()) {
AA.KernelEnvC = ExistingKernelEnvC;
return;
}
if (!AA.ReachedKnownParallelRegions.isValidState())
AA.setUseGenericStateMachineOfKernelEnvironment(
KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
ExistingKernelEnvC));
if (!AA.SPMDCompatibilityTracker.isValidState())
AA.setExecModeOfKernelEnvironment(
KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC));
ConstantInt *MayUseNestedParallelismC =
KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
AA.KernelEnvC);
ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
MayUseNestedParallelismC->getType(), AA.NestedParallelism);
AA.setMayUseNestedParallelismOfKernelEnvironment(
NewMayUseNestedParallelismC);
}
} RAII(*this);
// Callback to check a read/write instruction.
auto CheckRWInst = [&](Instruction &I) {
// We handle calls later.

View File

@ -7,13 +7,13 @@
@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
%1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
%1 = tail call i32 @__kmpc_target_init(ptr null)
ret void
}
define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
define internal i32 @__kmpc_target_init(ptr %0) {
store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
%4 = call i1 @__kmpc_kernel_parallel()
%2 = call i1 @__kmpc_kernel_parallel()
ret i32 0
}
@ -29,13 +29,13 @@ define internal i1 @__kmpc_kernel_parallel() {
; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
;.
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: ret void
;
;
; CHECK: Function Attrs: norecurse nosync nounwind memory(write)
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: ret i32 0
;
;.

View File

@ -310,7 +310,7 @@ entry:
define weak_odr void @t3() {
; CHECK-LABEL: define {{[^@]+}}@t3() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null, i8 noundef 0, i1 noundef false, i1 noundef false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noalias nocapture noundef align 4294967296 null)
; CHECK-NEXT: br label [[USER_CODE_ENTRY:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
@ -321,7 +321,7 @@ define weak_odr void @t3() {
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr null, i8 0, i1 false, i1 false)
%0 = call i32 @__kmpc_target_init(ptr null)
br label %user_code.entry
user_code.entry: ; preds = %entry
@ -335,7 +335,7 @@ for.body: ; preds = %for.cond
ret void
}
declare i32 @__kmpc_target_init(ptr, i8, i1, i1)
declare i32 @__kmpc_target_init(ptr)
define %S.2 @t3.helper() {
; CHECK-LABEL: define {{[^@]+}}@t3.helper() {

View File

@ -736,9 +736,9 @@ declare i32 @__kmpc_shuffle_int32(i32, i16, i16);
declare i64 @__kmpc_shuffle_int64(i64, i16, i16);
declare void @__kmpc_target_deinit(ptr, i8);
declare void @__kmpc_target_deinit();
declare i32 @__kmpc_target_init(ptr, i8, i1);
declare i32 @__kmpc_target_init(ptr);
declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32);
@ -1389,10 +1389,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; CHECK: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)
; CHECK-NOT: Function Attrs
; CHECK: declare void @__kmpc_target_deinit(ptr, i8)
; CHECK: declare void @__kmpc_target_deinit()
; CHECK-NOT: Function Attrs
; CHECK: declare i32 @__kmpc_target_init(ptr, i8, i1)
; CHECK: declare i32 @__kmpc_target_init(ptr)
; CHECK-NOT: Function Attrs
; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
@ -2037,10 +2037,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; OPTIMISTIC: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare void @__kmpc_target_deinit(ptr, i8)
; OPTIMISTIC: declare void @__kmpc_target_deinit()
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr, i8, i1)
; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr)
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
@ -2701,7 +2701,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; EXT: declare void @__kmpc_target_deinit(ptr, i8 signext)
; EXT-NOT: Function Attrs
; EXT: declare signext i32 @__kmpc_target_init(ptr, i8 signext, i1 signext)
; EXT: declare signext i32 @__kmpc_target_init(ptr)
; EXT-NOT: Function Attrs
; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)

View File

@ -1,19 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals
; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata"
@G = external global i8
; Function Attrs: norecurse nounwind
@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
;.
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_environment)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@ -24,13 +32,13 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: store i8 1, ptr @G, align 1
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@ -42,7 +50,7 @@ user_code.entry: ; preds = %entry
%isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
store i8 %isSPMD, ptr @G
call void @bar() #2
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
@ -51,9 +59,9 @@ worker.exit: ; preds = %entry
declare i8 @__kmpc_is_spmd_exec_mode()
declare i32 @__kmpc_target_init(ptr, i8, i1)
declare i32 @__kmpc_target_init(ptr)
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
@ -83,3 +91,17 @@ attributes #2 = { convergent }
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!"clang version 14.0.0"}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"}
;.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -36,6 +36,8 @@ target triple = "nvptx64"
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@ -43,24 +45,24 @@ target triple = "nvptx64"
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
@ -75,12 +77,12 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
call void @__kmpc_target_deinit() #3, !dbg !28
br label %common.ret
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
ret i32 0
}
@ -99,13 +101,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
@ -128,7 +130,7 @@ user_code.entry: ; preds = %entry
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
call void @__kmpc_target_deinit() #3, !dbg !46
br label %common.ret
}

View File

@ -5,12 +5,13 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8
@__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata"
@__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
declare void @use(i32)
@ -18,37 +19,37 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @2)
%2 = call i32 @__kmpc_global_thread_num(ptr @2)
call void @__kmpc_target_deinit(ptr @1, i8 2)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
ret void
}
declare i32 @__kmpc_target_init(ptr, i8, i1)
declare i32 @__kmpc_target_init(ptr)
declare i32 @__kmpc_global_thread_num(ptr) #1
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }

View File

@ -2,26 +2,27 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
@kernel0_exec_mode = weak constant i8 1
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@G = external global i32
@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[KERNEL0_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL1_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL2_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL0_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[KERNEL1_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[KERNEL2_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@ -31,24 +32,21 @@ define weak void @kernel0() #0 {
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@kernel1_exec_mode = weak constant i8 1
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@ -56,24 +54,21 @@ define weak void @kernel1() #0 {
; CHECK-NEXT: ret void
; CHECK: main.thread.user_code:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)
call void @helper1()
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@kernel2_exec_mode = weak constant i8 1
define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@ -84,12 +79,12 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -102,7 +97,7 @@ user_code.entry:
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@ -198,8 +193,8 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
ret i32 %ret
}
declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)
@ -214,7 +209,6 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!2 = !{ptr @kernel0, !"kernel", i32 1}
!3 = !{ptr @kernel1, !"kernel", i32 1}
!4 = !{ptr @kernel2, !"kernel", i32 1}
;
;.
; CHECK: attributes #[[ATTR0]] = { "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }

View File

@ -2,28 +2,22 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
@kernel0_exec_mode = weak constant i8 1
@G = external global i32
;.
; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
;.
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
%i = call i32 @__kmpc_target_init(ptr null)
call void @helper0()
call void @helper1()
call void @helper2()
@ -31,24 +25,20 @@ define weak void @kernel0() #0 {
ret void
}
@kernel1_exec_mode = weak constant i8 1
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
%i = call i32 @__kmpc_target_init(ptr null)
call void @helper1()
call void @__kmpc_target_deinit(ptr null, i1 false)
call void @__kmpc_target_deinit()
ret void
}
@kernel2_exec_mode = weak constant i8 1
define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
@ -56,14 +46,14 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
call void @__kmpc_target_deinit(ptr null, i1 false)
call void @__kmpc_target_deinit()
ret void
}
@ -112,8 +102,8 @@ define internal void @helper2() {
}
declare i32 @__kmpc_get_hardware_num_threads_in_block()
declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
!llvm.module.flags = !{!0, !1}

View File

@ -2,15 +2,17 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@_ZL6Device = internal global double 0.000000e+00, align 8
@__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0
@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
entry:
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -29,13 +31,13 @@ region.guarded:
region.barrier:
tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0
tail call void @__kmpc_target_deinit() #0
br label %common.ret
}
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
entry:
@ -78,29 +80,29 @@ attributes #1 = { convergent nounwind }
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR0]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR0]]
; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR0]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]]
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: ret void

View File

@ -7,17 +7,19 @@ target triple = "nvptx64"
; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@S = external local_unnamed_addr global ptr
@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
define void @foo() {
entry:
%c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true)
%c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @share(ptr %0), !dbg !10
call void @__kmpc_free_shared(ptr %0)
call void @__kmpc_target_deinit(ptr null, i1 false)
call void @__kmpc_target_deinit()
ret void
}
@ -31,9 +33,9 @@ declare ptr @__kmpc_alloc_shared(i64)
declare void @__kmpc_free_shared(ptr nocapture)
declare i32 @__kmpc_target_init(ptr, i1, i1);
declare i32 @__kmpc_target_init(ptr);
declare void @__kmpc_target_deinit(ptr, i1)
declare void @__kmpc_target_deinit()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}

View File

@ -36,19 +36,20 @@
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata"
@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
define weak void @__omp_offloading_10301_87b2c_foo_l7() {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@ -56,14 +57,14 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr)
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
ret void
}
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr %0) {
ret i32 0
}
@ -146,7 +147,7 @@ entry:
declare i32 @__kmpc_global_thread_num(ptr)
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:

View File

@ -2,37 +2,36 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@is_spmd_exec_mode = weak constant i8 0
@will_be_spmd_exec_mode = weak constant i8 1
@non_spmd_exec_mode = weak constant i8 1
@will_not_be_spmd_exec_mode = weak constant i8 1
@G = external global i8
@llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata"
@is_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@will_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata"
; CHECK: @[[IS_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
; CHECK: @[[WILL_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; CHECK: @[[WILL_NOT_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
define weak void @is_spmd() {
; CHECK-LABEL: define {{[^@]+}}@is_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
%i = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(ptr null, i8 2)
call void @__kmpc_target_deinit()
ret void
}
@ -40,7 +39,7 @@ define weak void @will_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@ -49,12 +48,12 @@ define weak void @will_be_spmd() {
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%i = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -65,41 +64,41 @@ user_code.entry:
%0 = call i32 @__kmpc_global_thread_num(ptr null)
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
define weak void @non_spmd() {
; CHECK-LABEL: define {{[^@]+}}@non_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
define weak void @will_not_be_spmd() {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@ -196,8 +195,8 @@ entry:
declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext)
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8)
declare i32 @__kmpc_target_init(ptr)
declare void @__kmpc_target_deinit()
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)
declare void @foo()

View File

@ -23,31 +23,31 @@
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@__omp_offloading_10302_bd7e0_main_l13_exec_mode = weak protected constant i8 3
@__omp_offloading_10302_bd7e0_main_l16_exec_mode = weak protected constant i8 1
@i_shared = internal addrspace(3) global [4 x i8] undef, align 16
@i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"
@__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null }
@__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
; CHECK: @[[I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
; CHECK: @[[I_I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
;.
define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 noundef %i) local_unnamed_addr #0 {
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@ -55,7 +55,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde
; CHECK: user_code.entry:
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
; CHECK: region.guarded.i:
@ -67,12 +67,12 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #6
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -96,11 +96,11 @@ _Z3fooi.internalized.exit: ; preds = %user_code.entry, %r
store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #6
call void @__kmpc_target_deinit() #6
br label %common.ret
}
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-LABEL: @_Z3fooi(
@ -131,7 +131,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@ -140,29 +140,16 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR1]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP2]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
%0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #6
%0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -177,7 +164,7 @@ user_code.entry: ; preds = %entry
store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #6
call void @__kmpc_target_deinit() #6
br label %common.ret
}

View File

@ -2,66 +2,62 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@no_spmd_exec_mode = weak constant i8 1
@spmd_exec_mode = weak constant i8 0
@parallel_exec_mode = weak constant i8 0
@G = external global i16
@llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
@none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
@spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
@parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
;.
; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i16
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
; CHECK: @[[NONE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[PARALLEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; CHECK: @[[SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
; CHECK: @[[PARALLEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
;.
define weak void @none_spmd() {
; CHECK-LABEL: define {{[^@]+}}@none_spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
%i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
call void @none_spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
define weak void @spmd() {
; CHECK-LABEL: define {{[^@]+}}@spmd() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
%i = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
call void @spmd_helper()
call void @mixed_helper()
call void @__kmpc_target_deinit(ptr null, i8 2)
call void @__kmpc_target_deinit()
ret void
}
define weak void @parallel() {
; CHECK-LABEL: define {{[^@]+}}@parallel() {
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
%i = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
call void @spmd_helper()
call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
call void @__kmpc_target_deinit(ptr null, i8 2)
call void @__kmpc_target_deinit()
ret void
}
@ -133,8 +129,8 @@ define internal void @parallel_helper() {
declare void @foo()
declare void @bar()
declare zeroext i16 @__kmpc_parallel_level(ptr, i32)
declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1
declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1
declare i32 @__kmpc_target_init(ptr) #1
declare void @__kmpc_target_deinit() #1
!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}

View File

@ -5,6 +5,12 @@
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
@S = external local_unnamed_addr global ptr
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
; UTC_ARGS: --disable
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
@ -12,59 +18,55 @@ target triple = "nvptx64"
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
; UTC_ARGS: --enable
@S = external local_unnamed_addr global ptr
%struct.ident_t = type { i32, i32, i32, i32, ptr }
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
; CHECK: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
; CHECK-DISABLED: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLED: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr %0) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
define void @kernel() {
; CHECK-LABEL: define {{[^@]+}}@kernel() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR0]]
; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp()
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit()
; CHECK-DISABLED-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
call void @foo()
call void @bar()
call void @convert_and_move_alloca()
call void @unknown_no_openmp()
call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
call void @__kmpc_target_deinit()
ret void
}

View File

@ -14,28 +14,30 @@ target triple = "nvptx64"
; UTC_ARGS: --enable
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@S = external local_unnamed_addr global ptr
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@foo_exec_mode = weak constant i8 1
@bar_exec_mode = weak constant i8 1
@baz_spmd_exec_mode = weak constant i8 2
@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
@bar_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
@baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null }
define dso_local void @foo() "kernel" {
entry:
%c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x, i64 4)
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
}
define void @bar() "kernel" {
%c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
br i1 %cmp, label %master1, label %exit
@ -54,12 +56,12 @@ master2:
call void @__kmpc_free_shared(ptr %y, i64 4)
br label %exit
exit:
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
}
define void @baz_spmd() "kernel" {
%c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true)
%c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
br i1 %c0, label %master3, label %exit
@ -69,7 +71,7 @@ master3:
call void @__kmpc_free_shared(ptr %z, i64 24)
br label %exit
exit:
call void @__kmpc_target_deinit(ptr @1, i8 2)
call void @__kmpc_target_deinit()
ret void
}
@ -97,11 +99,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
ret i32 0
}
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
@ -127,32 +129,29 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
; CHECK: @[[FOO_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[BAR_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[BAZ_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef
; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef
; CHECK: @[[FOO_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[BAR_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[BAZ_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] poison, align 4
; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
;.
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @bar_kernel_environment)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
@ -167,13 +166,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR3]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @baz_kernel_environment)
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
@ -183,7 +182,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
;
@ -211,7 +210,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;
;
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: ret i32 0
;
;.

View File

@ -4,10 +4,12 @@
; ModuleID = 'single_threaded_exeuction.c'
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [1 x i8] c"\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@kernel_exec_mode = weak constant i8 1
@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
; CHECK-NOT: [openmp-opt] Basic block @kernel entry is executed by a single thread.
@ -15,7 +17,7 @@
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
define void @kernel() {
%call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false)
%call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
@ -23,7 +25,7 @@ if.then:
if.else:
br label %if.end
if.end:
call void @__kmpc_target_deinit(ptr null, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@ -106,9 +108,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare void @__kmpc_kernel_prepare_parallel(ptr)
declare i32 @__kmpc_target_init(ptr, i8, i1)
declare i32 @__kmpc_target_init(ptr)
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
attributes #0 = { cold noinline }

File diff suppressed because it is too large Load Diff

View File

@ -13,19 +13,19 @@
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
@__omp_offloading_fd02_404433c2_main_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
@ -33,7 +33,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@ -56,12 +56,12 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR3]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -73,11 +73,11 @@ user_code.entry: ; preds = %entry
%call.i = call double @__nv_sin(double 0x400921FB54442D18) #6
store double %call.i, ptr %x, align 8, !tbaa !8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3
call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
call void @__kmpc_target_deinit() #3
br label %common.ret
}
declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 {
@ -113,7 +113,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
; Function Attrs: convergent
declare double @__nv_sin(double) local_unnamed_addr #5

View File

@ -2,8 +2,8 @@
;
; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function.
;
; CHECK: @__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null }
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false)
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
@ -13,6 +13,8 @@ target triple = "amdgcn-amd-amdhsa"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.DeviceEnvironmentTy = type { i32, i32, i32, i32 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] }
%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr }
%"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
@ -25,6 +27,7 @@ target triple = "amdgcn-amd-amdhsa"
@__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
@__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
@omptarget_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 4
@__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null }
@IsSPMDMode = weak hidden addrspace(3) global i32 undef, align 4
@.str.12 = private unnamed_addr addrspace(4) constant [47 x i8] c"ValueRAII initialization with wrong old value!\00", align 1
@_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16
@ -41,7 +44,7 @@ define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(i64 nou
entry:
%ng1 = alloca i32, align 4
%captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5)
%0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr))
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@ -49,7 +52,7 @@ user_code.entry: ; preds = %entry
%captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7
call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2)
call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1)
call void @__kmpc_target_deinit()
br label %common.ret
common.ret: ; preds = %user_code.entry, %entry
@ -104,9 +107,12 @@ entry:
}
; Function Attrs: convergent nounwind
define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
; define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment) local_unnamed_addr #9 {
entry:
%0 = and i32 undef, undef
%ExecMode = getelementptr inbounds %struct.ConfigurationEnvironmentTy, ptr %KernelEnvironment, i64 0, i32 2
%Mode = load i8, ptr %ExecMode, align 2, !tbaa !28
%1 = and i8 %Mode, 2
%tobool.not = icmp eq i8 %1, 0
br i1 %tobool.not, label %if.else, label %if.then
@ -248,6 +254,8 @@ _ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit: ; preds = %if.end
if.end10: ; preds = %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit
%sub.i = add nsw i32 %24, -64
%cmp = icmp ult i32 %25, %sub.i
%34 = load i8, ptr %KernelEnvironment, align 8
%UseGenericStateMachine = icmp ne i8 %34, 0
%or.cond251 = select i1 %UseGenericStateMachine, i1 %cmp, i1 false
br i1 %or.cond251, label %do.body.i, label %_ZN14DebugEntryRAIID2Ev.exit250
@ -261,7 +269,7 @@ _ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end1
}
; Function Attrs: nounwind
define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode) local_unnamed_addr #10 {
define internal void @__kmpc_target_deinit() local_unnamed_addr #10 {
ret void
}

View File

@ -35,29 +35,27 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1
@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
@LocGlob = private unnamed_addr addrspace(5) global i32 43
@__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
; CHECK: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
; CHECK-DISABLED: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 {
@ -68,7 +66,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
@ -179,7 +177,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@ -192,7 +190,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-DISABLED-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-DISABLED-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED: is_worker_check:
@ -271,7 +269,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 1) #[[ATTR6]]
; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
; CHECK-DISABLED-NEXT: ret void
@ -280,7 +278,7 @@ entry:
%loc = alloca ptr
%al32 = alloca i32
%N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@ -335,7 +333,7 @@ __omp_outlined__.exit: ; preds = %for.cond.i
%call14.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
%call15.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
%call16.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
call void @__kmpc_target_deinit() #3
ret void
worker.exit: ; preds = %entry
@ -368,13 +366,13 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) {
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
@ -394,7 +392,7 @@ declare void @usei8ptr(ptr) #1
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
declare void @llvm.experimental.noalias.scope.decl(metadata) #4

View File

@ -30,48 +30,42 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1
@__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external global i32, align 4
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
;.
define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@ -79,58 +73,24 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check:
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check:
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.execute:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]])
; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check1:
; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel()
; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
; CHECK-DISABLE-SPMDIZATION: thread.user_code.check:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @spmd_helper() #5
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
@ -138,26 +98,26 @@ worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
; Function Attrs: convergent noinline norecurse nounwind
define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@ -168,7 +128,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@ -176,24 +136,24 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @generic_helper() #5
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
@ -217,7 +177,7 @@ define internal void @spmd_helper() #1 {
; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]]
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:

View File

@ -31,47 +31,42 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@__omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1
@__omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external addrspace(5) global i32, align 4
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
;.
; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4
; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@ -80,7 +75,7 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
@ -118,19 +113,19 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @spmd_helper() #5
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry
@ -138,19 +133,19 @@ worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0
;
ret i32 0
}
declare void @__kmpc_target_deinit(ptr, i8)
declare void @__kmpc_target_deinit()
; Function Attrs: convergent noinline norecurse nounwind
define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
@ -158,7 +153,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK: is_worker_check:
@ -192,7 +187,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @generic_helper() #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@ -201,7 +196,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
@ -235,19 +230,19 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
%0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @generic_helper() #5
call void @__kmpc_target_deinit(ptr @1, i8 1)
call void @__kmpc_target_deinit()
ret void
worker.exit: ; preds = %entry

View File

@ -38,6 +38,8 @@ target triple = "nvptx64"
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@ -45,24 +47,25 @@ target triple = "nvptx64"
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
@ -77,11 +80,11 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
call void @__kmpc_target_deinit() #3, !dbg !28
br label %common.ret
}
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
define weak i32 @__kmpc_target_init(ptr) {
ret i32 0
}
@ -101,13 +104,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
declare void @__kmpc_target_deinit() local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
%0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
@ -129,7 +132,7 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @spmd_amenable()
call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
call void @__kmpc_target_deinit() #3, !dbg !46
br label %common.ret
}

View File

@ -4,32 +4,29 @@
target triple = "amdgcn-amd-amdhsa"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@G = internal addrspace(3) global i32 undef, align 4
@H = internal addrspace(3) global i32 undef, align 4
@X = internal addrspace(3) global i32 undef, align 4
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
;.
; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CHECK: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
;.
define void @kernel() "kernel" {
;
; CHECK: Function Attrs: norecurse
; CHECK-LABEL: define {{[^@]+}}@kernel
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
@ -47,10 +44,10 @@ define void @kernel() "kernel" {
; CHECK-NEXT: call void @barrier() #[[ATTR6]]
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
; CHECK-NEXT: call void @__kmpc_target_deinit()
; CHECK-NEXT: ret void
;
%call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
%call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
@ -75,7 +72,7 @@ if.then2:
call void @barrier();
br label %if.end
if.end:
call void @__kmpc_target_deinit(ptr undef, i8 1)
call void @__kmpc_target_deinit()
ret void
}
@ -147,8 +144,8 @@ define void @sync_def() {
declare void @sync()
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
declare void @use1(i32) nosync norecurse nounwind nocallback
declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
declare void @__kmpc_target_deinit(ptr, i8) nocallback
declare i32 @__kmpc_target_init(ptr) nocallback
declare void @__kmpc_target_deinit() nocallback
declare void @llvm.assume(i1)
!llvm.module.flags = !{!0, !1}

View File

@ -1,21 +1,22 @@
; RUN: opt -passes='default<O2>' -pass-remarks-missed=openmp-opt < %s 2>&1 | FileCheck %s --check-prefix=MODULE
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@.str = private unnamed_addr constant [13 x i8] c"Alloc Shared\00", align 1
@S = external local_unnamed_addr global ptr
@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
; MODULE: remark: openmp_opt_module.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
define void @foo() {
entry:
%i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true, i1 true)
%i = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
%x = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x)
call void @__kmpc_target_deinit(ptr null, i1 false, i1 true)
call void @__kmpc_target_deinit()
ret void
}
@ -31,8 +32,8 @@ entry:
declare ptr @_Z10SafeMallocmPKc(i64 %size, ptr nocapture readnone %msg)
declare void @__kmpc_free_shared(ptr)
declare i32 @__kmpc_target_init(ptr, i1, i1 %use_generic_state_machine, i1)
declare void @__kmpc_target_deinit(ptr, i1, i1)
declare i32 @__kmpc_target_init(ptr)
declare void @__kmpc_target_deinit()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}

View File

@ -126,6 +126,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
-nocudalib -nogpulib -nostdinc
-fopenmp -fopenmp-cuda-mode
-Wno-unknown-cuda-version
-DOMPTARGET_DEVICE_RUNTIME
-I${include_directory}
-I${devicertl_base_directory}/../include
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}

View File

@ -50,8 +50,6 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
struct DebugEntryRAII {
DebugEntryRAII(const char *File, const unsigned Line, const char *Function);
~DebugEntryRAII();
static void init();
};
#endif

View File

@ -214,12 +214,14 @@ uint32_t __kmpc_get_warp_size();
/// Kernel
///
///{
// Forward declaration
struct KernelEnvironmentTy;
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
bool UseGenericStateMachine);
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment);
void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode);
void __kmpc_target_deinit();
///}

View File

@ -17,6 +17,9 @@
#include "Types.h"
#include "Utils.h"
// Forward declaration.
struct KernelEnvironmentTy;
#pragma omp begin declare target device_type(nohost)
namespace ompx {
@ -113,7 +116,10 @@ extern ThreadStateTy **ThreadStates;
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD);
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
/// Return the kernel environment associated with the current kernel.
KernelEnvironmentTy &getKernelEnvironment();
/// TODO
enum ValueKind {

View File

@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Configuration.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "State.h"
#include "Types.h"
@ -53,7 +53,9 @@ bool config::isDebugMode(config::DebugKind Kind) {
bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }
bool config::mayUseNestedParallelism() {
return !__omp_rtl_assume_no_nested_parallelism;
if (__omp_rtl_assume_no_nested_parallelism)
return false;
return state::getKernelEnvironment().Configuration.MayUseNestedParallelism;
}
#pragma omp end declare target

View File

@ -12,8 +12,10 @@
#include "Debug.h"
#include "Configuration.h"
#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
#include "Types.h"
using namespace ompx;
@ -31,15 +33,14 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
}
}
/// Current indentation level for the function trace. Only accessed by thread 0.
__attribute__((loader_uninitialized)) static uint32_t Level;
#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc)
DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
const char *Function) {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
for (int I = 0; I < Level; ++I)
PRINTF("%s", " ");
@ -51,10 +52,11 @@ DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
DebugEntryRAII::~DebugEntryRAII() {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0)
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
Level--;
}
}
void DebugEntryRAII::init() { Level = 0; }
#pragma omp end declare target

View File

@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "Debug.h"
#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
@ -23,11 +24,12 @@ using namespace ompx;
#pragma omp begin declare target device_type(nohost)
static void inititializeRuntime(bool IsSPMD) {
static void inititializeRuntime(bool IsSPMD,
KernelEnvironmentTy &KernelEnvironment) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
state::init(IsSPMD);
state::init(IsSPMD, KernelEnvironment);
}
/// Simple generic state machine for worker threads.
@ -67,16 +69,17 @@ extern "C" {
///
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
bool UseGenericStateMachine) {
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
FunctionTracingRAII();
const bool IsSPMD =
Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode &
llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
if (IsSPMD) {
inititializeRuntime(/* IsSPMD */ true);
inititializeRuntime(/* IsSPMD */ true, KernelEnvironment);
synchronize::threadsAligned(atomic::relaxed);
} else {
inititializeRuntime(/* IsSPMD */ false);
inititializeRuntime(/* IsSPMD */ false, KernelEnvironment);
// No need to wait since only the main threads will execute user
// code and workers will run into a barrier right away.
}
@ -108,7 +111,7 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
// thread's warp, so none of its threads can ever be active worker threads.
if (UseGenericStateMachine &&
mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
genericStateMachine(Ident);
genericStateMachine(KernelEnvironment.Ident);
} else {
// Retrieve the work function just to ensure we always call
// __kmpc_kernel_parallel even if a custom state machine is used.
@ -132,11 +135,10 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
///
/// \param Ident Source location identification, can be NULL.
///
void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) {
void __kmpc_target_deinit() {
FunctionTracingRAII();
const bool IsSPMD =
Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
bool IsSPMD = mapping::isSPMDMode();
state::assumeInitialState(IsSPMD);
if (IsSPMD)
return;

View File

@ -9,8 +9,8 @@
//===----------------------------------------------------------------------===//
#include "State.h"
#include "Configuration.h"
#include "Debug.h"
#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
#include "Synchronization.h"
@ -34,6 +34,9 @@ constexpr const uint32_t Alignment = 16;
extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
/// The kernel environment passed to the init method by the compiler.
static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr);
namespace {
/// Fallback implementations are missing to trigger a link time error.
@ -241,15 +244,19 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
void state::init(bool IsSPMD) {
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
SharedMemorySmartStack.init(IsSPMD);
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
TeamState.init(IsSPMD);
DebugEntryRAII::init();
ThreadStates = nullptr;
KernelEnvironmentPtr = &KernelEnvironment;
}
}
KernelEnvironmentTy &state::getKernelEnvironment() {
return *KernelEnvironmentPtr;
}
void state::enterDataEnvironment(IdentTy *Ident) {
ASSERT(config::mayUseThreadStates() &&
"Thread state modified while explicitly disabled!");

View File

@ -1,25 +0,0 @@
//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Global device environment
//
//===----------------------------------------------------------------------===//
#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
#define _OMPTARGET_DEVICE_ENVIRONMENT_H_
// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions
struct DeviceEnvironmentTy {
uint32_t DebugKind;
uint32_t NumDevices;
uint32_t DeviceNum;
uint32_t DynamicMemSize;
};
#endif

View File

@ -0,0 +1,61 @@
//===------------ Environment.h - OpenMP GPU environments --------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Environments shared between host and device.
//
//===----------------------------------------------------------------------===//
#ifndef _OMPTARGET_ENVIRONMENT_H_
#define _OMPTARGET_ENVIRONMENT_H_
#ifdef OMPTARGET_DEVICE_RUNTIME
#include "Types.h"
#else
#include "SourceInfo.h"
#include <cstdint>
using IdentTy = ident_t;
#endif
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
struct DeviceEnvironmentTy {
uint32_t DebugKind;
uint32_t NumDevices;
uint32_t DeviceNum;
uint32_t DynamicMemSize;
};
// NOTE: Please don't change the order of those members as their indices are
// used in the middle end. Always add the new data member at the end.
// Different from KernelEnvironmentTy below, this structure contains members
// that might be modified at runtime.
struct DynamicEnvironmentTy {
/// Current indentation level for the function trace. Only accessed by thread
/// 0.
uint16_t DebugIndentionLevel;
};
// NOTE: Please don't change the order of those members as their indices are
// used in the middle end. Always add the new data member at the end.
struct ConfigurationEnvironmentTy {
uint8_t UseGenericStateMachine;
uint8_t MayUseNestedParallelism;
llvm::omp::OMPTgtExecModeFlags ExecMode;
};
// NOTE: Please don't change the order of those members as their indices are
// used in the middle end. Always add the new data member at the end.
struct KernelEnvironmentTy {
ConfigurationEnvironmentTy Configuration;
IdentTy *Ident;
DynamicEnvironmentTy *DynamicEnv;
};
#endif // _OMPTARGET_ENVIRONMENT_H_

View File

@ -21,7 +21,7 @@
#include <unordered_map>
#include "Debug.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "GlobalHandler.h"
#include "PluginInterface.h"
#include "Utilities.h"

View File

@ -582,32 +582,45 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
return Plugin::success();
}
Expected<KernelEnvironmentTy>
GenericDeviceTy::getKernelEnvironmentForKernel(StringRef Name,
DeviceImageTy &Image) {
// Create a metadata object for the kernel environment object.
StaticGlobalTy<KernelEnvironmentTy> KernelEnv(Name.data(),
"_kernel_environment");
// Retrieve kernel environment object for the kernel.
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
if (auto Err = GHandler.readGlobalFromImage(*this, Image, KernelEnv)) {
// Consume the error since it is acceptable to fail.
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
DP("Failed to read kernel environment object for '%s': %s\n", Name.data(),
ErrStr.data());
return createStringError(inconvertibleErrorCode(), ErrStr);
}
return KernelEnv.getValue();
}
Expected<OMPTgtExecModeFlags>
GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
DeviceImageTy &Image) {
// Create a metadata object for the exec mode global (auto-generated).
StaticGlobalTy<llvm::omp::OMPTgtExecModeFlags> ExecModeGlobal(Name.data(),
"_exec_mode");
// Retrieve execution mode for the kernel. This may fail since some kernels
// may not have an execution mode.
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) {
// Consume the error since it is acceptable to fail.
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
DP("Failed to read execution mode for '%s': %s\n"
"Using default SPMD (2) execution mode\n",
Name.data(), ErrStr.data());
auto KernelEnvOrError = getKernelEnvironmentForKernel(Name, Image);
if (!KernelEnvOrError) {
(void)KernelEnvOrError.takeError();
return OMP_TGT_EXEC_MODE_SPMD;
}
// Check that the retrieved execution mode is valid.
if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
return Plugin::error("Invalid execution mode %d for '%s'",
ExecModeGlobal.getValue(), Name.data());
auto &KernelEnv = *KernelEnvOrError;
auto ExecMode = KernelEnv.Configuration.ExecMode;
return ExecModeGlobal.getValue();
// Check that the retrieved execution mode is valid.
if (!GenericKernelTy::isValidExecutionMode(ExecMode))
return Plugin::error("Invalid execution mode %d for '%s'", ExecMode,
Name.data());
return ExecMode;
}
Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,

View File

@ -19,7 +19,7 @@
#include <vector>
#include "Debug.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "GlobalHandler.h"
#include "JIT.h"
#include "MemoryManager.h"
@ -748,6 +748,11 @@ protected:
/// Map of host pinned allocations used for optimize device transfers.
PinnedAllocationMapTy PinnedAllocs;
private:
/// Return the kernel environment object for kernel \p Name.
Expected<KernelEnvironmentTy>
getKernelEnvironmentForKernel(StringRef Name, DeviceImageTy &Image);
};
/// Class implementing common functionalities of offload plugins. Each plugin

View File

@ -17,7 +17,7 @@
#include <unordered_map>
#include "Debug.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "GlobalHandler.h"
#include "PluginInterface.h"

View File

@ -17,7 +17,7 @@
#include <unordered_map>
#include "Debug.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "GlobalHandler.h"
#include "PluginInterface.h"
#include "omptarget.h"

View File

@ -37,7 +37,7 @@
#include "internal.h"
#include "rt.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "get_elf_mach_gfx_name.h"
#include "omptargetplugin.h"
#include "print_tracing.h"

View File

@ -23,7 +23,7 @@
#include <vector>
#include "Debug.h"
#include "DeviceEnvironment.h"
#include "Environment.h"
#include "omptarget.h"
#include "omptargetplugin.h"