[libc] Add basic wrappers for the backend address spaces

The GPU makes use of different address spaces. We generally work with
global memory, thread private memory, and thread shared memory. This
patch simply adds a few preliminary wrappers to map these concepts to
the numerical values the backend uses. Obviously casts between these
will need to be checked by the user.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156731
This commit is contained in:
Joseph Huber 2023-07-31 14:32:44 -05:00
parent b1193c13a5
commit 3926feb84e
3 changed files with 17 additions and 0 deletions

View File

@ -20,6 +20,12 @@ namespace gpu {
/// The number of threads that execute in lock-step in a lane.
constexpr const uint64_t LANE_SIZE = __AMDGCN_WAVEFRONT_SIZE;
/// Type aliases to the address spaces used by the AMDGPU backend.
template <typename T> using Private = [[clang::opencl_private]] T;
template <typename T> using Constant = [[clang::opencl_constant]] T;
template <typename T> using Local = [[clang::opencl_local]] T;
template <typename T> using Global = [[clang::opencl_global]] T;
/// Returns the number of workgroups in the 'x' dimension of the grid.
LIBC_INLINE uint32_t get_num_blocks_x() {
return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();

View File

@ -18,6 +18,11 @@ namespace gpu {
constexpr const uint64_t LANE_SIZE = 1;
template <typename T> using Private = T;
template <typename T> using Constant = T;
template <typename T> using Shared = T;
template <typename T> using Global = T;
LIBC_INLINE uint32_t get_num_blocks_x() { return 1; }
LIBC_INLINE uint32_t get_num_blocks_y() { return 1; }

View File

@ -19,6 +19,12 @@ namespace gpu {
/// The number of threads that execute in lock-step in a warp.
constexpr const uint64_t LANE_SIZE = 32;
/// Type aliases to the address spaces used by the NVPTX backend.
template <typename T> using Private = [[clang::opencl_private]] T;
template <typename T> using Constant = [[clang::opencl_constant]] T;
template <typename T> using Local = [[clang::opencl_local]] T;
template <typename T> using Global = [[clang::opencl_global]] T;
/// Returns the number of CUDA blocks in the 'x' dimension.
LIBC_INLINE uint32_t get_num_blocks_x() {
return __nvvm_read_ptx_sreg_nctaid_x();