mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-04 23:19:06 +00:00
[libomptarget] Refactor syncwarp macro to inline function
Summary: [libomptarget] Refactor syncwarp macro to inline function See also abandoned D66846, split into this diff and others. Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D66857 llvm-svn: 370149
This commit is contained in:
parent
e73e3013a6
commit
be3d487313
openmp/libomptarget/deviceRTLs/nvptx/src
@ -52,11 +52,8 @@
|
||||
#error CUDA_VERSION macro is undefined, something wrong with cuda.
|
||||
#elif CUDA_VERSION >= 9000
|
||||
#define __ACTIVEMASK() __activemask()
|
||||
#define __SYNCWARP(Mask) __syncwarp(Mask)
|
||||
#else
|
||||
#define __ACTIVEMASK() __ballot(1)
|
||||
// In Cuda < 9.0 no need to sync threads in warps.
|
||||
#define __SYNCWARP(Mask)
|
||||
#endif // CUDA_VERSION
|
||||
|
||||
#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
|
||||
|
@ -14,6 +14,8 @@
|
||||
// Execution Parameters
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "target_impl.h"
|
||||
|
||||
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
|
||||
execution_param = EMode;
|
||||
execution_param |= RMode;
|
||||
@ -203,7 +205,7 @@ INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
|
||||
|
||||
INLINE void IncParallelLevel(bool ActiveParallel) {
|
||||
unsigned Active = __ACTIVEMASK();
|
||||
__SYNCWARP(Active);
|
||||
__kmpc_impl_syncwarp(Active);
|
||||
unsigned LaneMaskLt;
|
||||
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
|
||||
unsigned Rank = __popc(Active & LaneMaskLt);
|
||||
@ -212,12 +214,12 @@ INLINE void IncParallelLevel(bool ActiveParallel) {
|
||||
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
|
||||
__threadfence();
|
||||
}
|
||||
__SYNCWARP(Active);
|
||||
__kmpc_impl_syncwarp(Active);
|
||||
}
|
||||
|
||||
INLINE void DecParallelLevel(bool ActiveParallel) {
|
||||
unsigned Active = __ACTIVEMASK();
|
||||
__SYNCWARP(Active);
|
||||
__kmpc_impl_syncwarp(Active);
|
||||
unsigned LaneMaskLt;
|
||||
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
|
||||
unsigned Rank = __popc(Active & LaneMaskLt);
|
||||
@ -226,7 +228,7 @@ INLINE void DecParallelLevel(bool ActiveParallel) {
|
||||
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
|
||||
__threadfence();
|
||||
}
|
||||
__SYNCWARP(Active);
|
||||
__kmpc_impl_syncwarp(Active);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -63,6 +63,12 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
|
||||
#endif // CUDA_VERSION
|
||||
}
|
||||
|
||||
INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
|
||||
INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
|
||||
#if CUDA_VERSION >= 9000
|
||||
__syncwarp(Mask);
|
||||
#else
|
||||
// In Cuda < 9.0 no need to sync threads in warps.
|
||||
#endif // CUDA_VERSION
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user