mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-09 17:43:57 +00:00
5d02ca49a2
[libomptarget][nvptx] Undef, weak shared variables Shared variables on nvptx, and LDS on amdgcn, are uninitialized at the start of kernel execution. Therefore create the variables with undef instead of zeros, motivated in part by the amdgcn back end rejecting LDS+initializer. Common is zero initialized, which seems incompatible with shared. Thus change them to weak, following the direction of https://reviews.llvm.org/rG7b3eabdcd215 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D90248
120 lines
6.5 KiB
C++
120 lines
6.5 KiB
C++
// Test device global memory data sharing codegen.
|
|
///==========================================================================///
|
|
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CK1 --check-prefix SEQ
|
|
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CK1 --check-prefix PAR
|
|
|
|
// expected-no-diagnostics
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
void test_ds(){
|
|
#pragma omp target
|
|
{
|
|
int a = 10;
|
|
#pragma omp parallel
|
|
{
|
|
a = 1000;
|
|
}
|
|
int b = 100;
|
|
int c = 1000;
|
|
#pragma omp parallel private(c)
|
|
{
|
|
int *c1 = &c;
|
|
b = a + 10000;
|
|
}
|
|
}
|
|
}
|
|
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
|
|
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
|
|
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
|
|
// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8
|
|
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
|
|
|
|
/// ========= In the worker function ========= ///
|
|
// CK1: {{.*}}define internal void @__omp_offloading{{.*}}test_ds{{.*}}_worker()
|
|
// CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
|
|
// CK1-NOT: call void @__kmpc_data_sharing_init_stack
|
|
|
|
/// ========= In the kernel function ========= ///
|
|
|
|
// CK1: {{.*}}define weak void @__omp_offloading{{.*}}test_ds{{.*}}()
|
|
// CK1: [[SHAREDARGS1:%.+]] = alloca i8**
|
|
// CK1: [[SHAREDARGS2:%.+]] = alloca i8**
|
|
// CK1: call void @__kmpc_kernel_init
|
|
// CK1: call void @__kmpc_data_sharing_init_stack
|
|
// SEQ: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
|
// SEQ: [[SIZE:%.+]] = load i64, i64* [[KERNEL_SIZE]],
|
|
// SEQ: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i64 [[SIZE]], i16 [[SHARED_MEM_FLAG]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
|
|
// SEQ: [[KERNEL_RD:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
|
|
// SEQ: [[GLOBALSTACK:%.+]] = getelementptr inbounds i8, i8* [[KERNEL_RD]], i64 0
|
|
// PAR: [[GLOBALSTACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{32|64}} 8, i16 1)
|
|
// CK1: [[GLOBALSTACK2:%.+]] = bitcast i8* [[GLOBALSTACK]] to %struct._globalized_locals_ty*
|
|
// CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
|
|
// CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1
|
|
// CK1: store i32 10, i32* [[A]]
|
|
// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
|
|
// CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1)
|
|
// CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
|
|
// CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0
|
|
// CK1: [[SHAREDVAR:%.+]] = bitcast i32* [[A]] to i8*
|
|
// CK1: store i8* [[SHAREDVAR]], i8** [[SHARGSTMP2]]
|
|
// CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
|
|
// CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
|
|
// CK1: call void @__kmpc_end_sharing_variables()
|
|
// CK1: store i32 100, i32* [[B]]
|
|
// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
|
|
// CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2)
|
|
// CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
|
|
// CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0
|
|
// CK1: [[SHAREDVAR1:%.+]] = bitcast i32* [[B]] to i8*
|
|
// CK1: store i8* [[SHAREDVAR1]], i8** [[SHARGSTMP4]]
|
|
// CK1: [[SHARGSTMP12:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 1
|
|
// CK1: [[SHAREDVAR2:%.+]] = bitcast i32* [[A]] to i8*
|
|
// CK1: store i8* [[SHAREDVAR2]], i8** [[SHARGSTMP12]]
|
|
// CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
|
|
// CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
|
|
// CK1: call void @__kmpc_end_sharing_variables()
|
|
// SEQ: [[SHARED_MEM_FLAG:%.+]] = load i16, i16* [[KERNEL_SHARED]],
|
|
// SEQ: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[SHARED_MEM_FLAG]])
|
|
// PAR: call void @__kmpc_data_sharing_pop_stack(i8* [[GLOBALSTACK]])
|
|
// CK1: call void @__kmpc_kernel_deinit(i16 1)
|
|
|
|
/// ========= In the data sharing wrapper function ========= ///
|
|
|
|
// CK1: {{.*}}define internal void @__omp_outlined{{.*}}wrapper({{.*}})
|
|
// CK1: [[SHAREDARGS4:%.+]] = alloca i8**
|
|
// CK1: call void @__kmpc_get_shared_variables(i8*** [[SHAREDARGS4]])
|
|
// CK1: [[SHARGSTMP13:%.+]] = load i8**, i8*** [[SHAREDARGS4]]
|
|
// CK1: [[SHARGSTMP14:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP13]], i64 0
|
|
// CK1: [[SHARGSTMP15:%.+]] = bitcast i8** [[SHARGSTMP14]] to i32**
|
|
// CK1: [[SHARGSTMP16:%.+]] = load i32*, i32** [[SHARGSTMP15]]
|
|
// CK1: call void @__omp_outlined__{{.*}}({{.*}}, i32* [[SHARGSTMP16]])
|
|
|
|
/// outlined function for the second parallel region ///
|
|
|
|
// CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* nonnull align {{[0-9]+}} dereferenceable{{.+}}, i32* nonnull align {{[0-9]+}} dereferenceable{{.+}})
|
|
// CK1-NOT: call i8* @__kmpc_data_sharing_push_stack(
|
|
// CK1: [[C_ADDR:%.+]] = alloca i32,
|
|
// CK1: store i32* [[C_ADDR]], i32** %
|
|
// CK1i-NOT: call void @__kmpc_data_sharing_pop_stack(
|
|
|
|
/// ========= In the data sharing wrapper function ========= ///
|
|
|
|
// CK1: {{.*}}define internal void @__omp_outlined{{.*}}wrapper({{.*}})
|
|
// CK1: [[SHAREDARGS3:%.+]] = alloca i8**
|
|
// CK1: call void @__kmpc_get_shared_variables(i8*** [[SHAREDARGS3]])
|
|
// CK1: [[SHARGSTMP5:%.+]] = load i8**, i8*** [[SHAREDARGS3]]
|
|
// CK1: [[SHARGSTMP6:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP5]], i64 0
|
|
// CK1: [[SHARGSTMP7:%.+]] = bitcast i8** [[SHARGSTMP6]] to i32**
|
|
// CK1: [[SHARGSTMP8:%.+]] = load i32*, i32** [[SHARGSTMP7]]
|
|
// CK1: [[SHARGSTMP9:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP5]], i64 1
|
|
// CK1: [[SHARGSTMP10:%.+]] = bitcast i8** [[SHARGSTMP9]] to i32**
|
|
// CK1: [[SHARGSTMP11:%.+]] = load i32*, i32** [[SHARGSTMP10]]
|
|
// CK1: call void @__omp_outlined__{{.*}}({{.*}}, i32* [[SHARGSTMP8]], i32* [[SHARGSTMP11]])
|
|
|
|
#endif
|
|
|