[OpenMP][NVPTX] Replaced CUDA builtin vars with LLVM intrinsics

Replaced CUDA builtin vars with LLVM intrinsics such that we don't need
definitions of those intrinsics.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95013
This commit is contained in:
Shilei Tian 2021-01-20 12:01:51 -05:00
parent c540ce9900
commit fd70f70d1e

View File

@ -115,10 +115,12 @@ DEVICE void __kmpc_impl_threadfence_block() { __threadfence_block(); }
DEVICE void __kmpc_impl_threadfence_system() { __threadfence_system(); }
// Calls to the NVPTX layer (assuming 1D layout)
DEVICE int GetThreadIdInBlock() { return threadIdx.x; }
DEVICE int GetBlockIdInKernel() { return blockIdx.x; }
DEVICE int GetNumberOfBlocksInKernel() { return gridDim.x; }
DEVICE int GetNumberOfThreadsInBlock() { return blockDim.x; }
DEVICE int GetThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
DEVICE int GetBlockIdInKernel() { return __nvvm_read_ptx_sreg_ctaid_x(); }
DEVICE int GetNumberOfBlocksInKernel() {
return __nvvm_read_ptx_sreg_nctaid_x();
}
DEVICE int GetNumberOfThreadsInBlock() { return __nvvm_read_ptx_sreg_ntid_x(); }
DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }