mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 06:10:12 +00:00
[CUDA] Added conversion functions to builtin vars.
This is needed to compile some headers in CUDA-11 that assume that threadIdx is implicitly convertible to dim3. With NVCC, threadIdx is uint3 and there's dim3(uint3) constructor. Clang uses a special type for the builtin variables, so that path does not work. Instead, this patch adds conversion function to the builtin variable classes. that will allow them to be converted to dim3 and uint3. Differential Revision: https://reviews.llvm.org/D88250
This commit is contained in:
parent
2a96f47c5f
commit
30514f0afa
@ -55,7 +55,9 @@ struct __cuda_builtin_threadIdx_t {
|
||||
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
|
||||
// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
|
||||
// uint3). This function is defined after we pull in vector_types.h.
|
||||
__attribute__((device)) operator dim3() const;
|
||||
__attribute__((device)) operator uint3() const;
|
||||
|
||||
private:
|
||||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
|
||||
};
|
||||
@ -66,7 +68,9 @@ struct __cuda_builtin_blockIdx_t {
|
||||
__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
|
||||
// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
|
||||
// uint3). This function is defined after we pull in vector_types.h.
|
||||
__attribute__((device)) operator dim3() const;
|
||||
__attribute__((device)) operator uint3() const;
|
||||
|
||||
private:
|
||||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
|
||||
};
|
||||
@ -78,6 +82,8 @@ struct __cuda_builtin_blockDim_t {
|
||||
// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
|
||||
// dim3). This function is defined after we pull in vector_types.h.
|
||||
__attribute__((device)) operator dim3() const;
|
||||
__attribute__((device)) operator uint3() const;
|
||||
|
||||
private:
|
||||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
|
||||
};
|
||||
@ -89,6 +95,8 @@ struct __cuda_builtin_gridDim_t {
|
||||
// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
|
||||
// dim3). This function is defined after we pull in vector_types.h.
|
||||
__attribute__((device)) operator dim3() const;
|
||||
__attribute__((device)) operator uint3() const;
|
||||
|
||||
private:
|
||||
__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
|
||||
};
|
||||
@ -108,5 +116,6 @@ __attribute__((device)) const int warpSize = 32;
|
||||
#undef __CUDA_DEVICE_BUILTIN
|
||||
#undef __CUDA_BUILTIN_VAR
|
||||
#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
|
||||
#undef __DELETE
|
||||
|
||||
#endif /* __CUDA_BUILTIN_VARS_H */
|
||||
|
@ -377,30 +377,38 @@ __device__ static inline void *malloc(size_t __size) {
|
||||
// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to
|
||||
// come after we've pulled in the definition of uint3 and dim3.
|
||||
|
||||
__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const {
|
||||
return dim3(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
|
||||
uint3 ret;
|
||||
ret.x = x;
|
||||
ret.y = y;
|
||||
ret.z = z;
|
||||
return ret;
|
||||
return {x, y, z};
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const {
|
||||
return dim3(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
|
||||
uint3 ret;
|
||||
ret.x = x;
|
||||
ret.y = y;
|
||||
ret.z = z;
|
||||
return ret;
|
||||
return {x, y, z};
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
|
||||
return dim3(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_blockDim_t::operator uint3() const {
|
||||
return {x, y, z};
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
|
||||
return dim3(x, y, z);
|
||||
}
|
||||
|
||||
__device__ inline __cuda_builtin_gridDim_t::operator uint3() const {
|
||||
return {x, y, z};
|
||||
}
|
||||
|
||||
#include <__clang_cuda_cmath.h>
|
||||
#include <__clang_cuda_intrinsics.h>
|
||||
#include <__clang_cuda_complex_builtins.h>
|
||||
|
Loading…
Reference in New Issue
Block a user