[CUDA] Added conversion functions to builtin vars.

This is needed to compile some headers in CUDA-11 that assume that threadIdx is
implicitly convertible to dim3. With NVCC, threadIdx is uint3 and there's
dim3(uint3) constructor. Clang uses a special type for the builtin variables, so
that path does not work. Instead, this patch adds conversion function to the
builtin variable classes. that will allow them to be converted to dim3 and uint3.

Differential Revision: https://reviews.llvm.org/D88250
This commit is contained in:
Artem Belevich 2020-09-24 10:47:45 -07:00
parent 2a96f47c5f
commit 30514f0afa
2 changed files with 27 additions and 10 deletions

View File

@ -55,7 +55,9 @@ struct __cuda_builtin_threadIdx_t {
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
};
@ -66,7 +68,9 @@ struct __cuda_builtin_blockIdx_t {
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
// uint3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
};
@ -78,6 +82,8 @@ struct __cuda_builtin_blockDim_t {
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
};
@ -89,6 +95,8 @@ struct __cuda_builtin_gridDim_t {
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
// dim3). This function is defined after we pull in vector_types.h.
__attribute__((device)) operator dim3() const;
__attribute__((device)) operator uint3() const;
private:
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
};
@ -108,5 +116,6 @@ __attribute__((device)) const int warpSize = 32;
#undef __CUDA_DEVICE_BUILTIN
#undef __CUDA_BUILTIN_VAR
#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
#undef __DELETE
#endif /* __CUDA_BUILTIN_VARS_H */

View File

@ -377,30 +377,38 @@ __device__ static inline void *malloc(size_t __size) {
// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to
// come after we've pulled in the definition of uint3 and dim3.
__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const {
return dim3(x, y, z);
}
__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
uint3 ret;
ret.x = x;
ret.y = y;
ret.z = z;
return ret;
return {x, y, z};
}
__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const {
return dim3(x, y, z);
}
__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
uint3 ret;
ret.x = x;
ret.y = y;
ret.z = z;
return ret;
return {x, y, z};
}
__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
return dim3(x, y, z);
}
__device__ inline __cuda_builtin_blockDim_t::operator uint3() const {
return {x, y, z};
}
__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
return dim3(x, y, z);
}
__device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
return {x, y, z};
}
#include <__clang_cuda_cmath.h>
#include <__clang_cuda_intrinsics.h>
#include <__clang_cuda_complex_builtins.h>