mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-21 06:52:10 +00:00
[OPENMP][NVPTX]Fix incompatibility of __syncthreads with LLVM, NFC.
Summary: One of the LLVM optimizations, split critical edges, also clones tail instructions. This is a dangerous operation for __syncthreads() functions and this transformation leads to undefined behavior or incorrect results. Patch fixes this problem by replacing __syncthreads() function with the assembler instruction, which cost is too high and wich cannot be copied. Reviewers: grokos, gtbercea, kkwli0 Subscribers: guansong, openmp-commits, caomhin Differential Revision: https://reviews.llvm.org/D56274 llvm-svn: 350333
This commit is contained in:
parent
0f67746c92
commit
3c74be8049
@ -564,7 +564,8 @@ EXTERN void __kmpc_get_team_static_memory(const void *buf, size_t size,
|
|||||||
if (GetThreadIdInBlock() == 0) {
|
if (GetThreadIdInBlock() == 0) {
|
||||||
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
|
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == getMasterThreadId(),
|
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == getMasterThreadId(),
|
||||||
@ -577,7 +578,8 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t is_shared) {
|
|||||||
if (is_shared)
|
if (is_shared)
|
||||||
return;
|
return;
|
||||||
if (isSPMDMode()) {
|
if (isSPMDMode()) {
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
if (GetThreadIdInBlock() == 0) {
|
if (GetThreadIdInBlock() == 0) {
|
||||||
omptarget_nvptx_simpleMemoryManager.Release();
|
omptarget_nvptx_simpleMemoryManager.Release();
|
||||||
}
|
}
|
||||||
|
@ -105,7 +105,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
|
|||||||
omptarget_nvptx_simpleThreadPrivateContext =
|
omptarget_nvptx_simpleThreadPrivateContext =
|
||||||
omptarget_nvptx_device_simpleState[slot].Dequeue();
|
omptarget_nvptx_device_simpleState[slot].Dequeue();
|
||||||
}
|
}
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
omptarget_nvptx_simpleThreadPrivateContext->Init();
|
omptarget_nvptx_simpleThreadPrivateContext->Init();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -129,7 +130,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
|
|||||||
// init team context
|
// init team context
|
||||||
currTeamDescr.InitTeamDescr();
|
currTeamDescr.InitTeamDescr();
|
||||||
}
|
}
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
|
|
||||||
omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
|
omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
|
||||||
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
|
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
|
||||||
@ -170,7 +172,8 @@ EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() {
|
|||||||
EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
|
EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
|
||||||
// We're not going to pop the task descr stack of each thread since
|
// We're not going to pop the task descr stack of each thread since
|
||||||
// there are no more parallel regions in SPMD mode.
|
// there are no more parallel regions in SPMD mode.
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
int threadId = GetThreadIdInBlock();
|
int threadId = GetThreadIdInBlock();
|
||||||
if (!RequiresOMPRuntime) {
|
if (!RequiresOMPRuntime) {
|
||||||
if (threadId == 0) {
|
if (threadId == 0) {
|
||||||
|
@ -63,6 +63,9 @@
|
|||||||
#define __ACTIVEMASK() __ballot(1)
|
#define __ACTIVEMASK() __ballot(1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
|
||||||
|
#define __SYNCTHREADS() __SYNCTHREADS_N(0)
|
||||||
|
|
||||||
// arguments needed for L0 parallelism only.
|
// arguments needed for L0 parallelism only.
|
||||||
class omptarget_nvptx_SharedArgs {
|
class omptarget_nvptx_SharedArgs {
|
||||||
public:
|
public:
|
||||||
|
@ -74,7 +74,8 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
|
|||||||
// parallel region and that all worker threads participate.
|
// parallel region and that all worker threads participate.
|
||||||
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
|
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
|
||||||
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
|
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
|
||||||
__syncthreads();
|
// FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
|
||||||
|
__SYNCTHREADS();
|
||||||
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
|
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user