mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-07 10:54:01 +00:00
[OpenMP][FIX] Move workaround code to avoid races
The workaround code ensure we always call __kmpc_kernel_parallel, but it did so in a racy manner as the initialization might not have been completed yet. To avoid introducing a sync, we move the workaround into the deinit function for now.
This commit is contained in:
parent
1f5a9d1d07
commit
0012b956f9
@ -110,20 +110,8 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
|
||||
// main thread's warp, so none of its threads can ever be active worker
|
||||
// threads.
|
||||
if (UseGenericStateMachine &&
|
||||
mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD)) {
|
||||
mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD))
|
||||
genericStateMachine(KernelEnvironment.Ident);
|
||||
} else {
|
||||
// Retrieve the work function just to ensure we always call
|
||||
// __kmpc_kernel_parallel even if a custom state machine is used.
|
||||
// TODO: this is not super pretty. The problem is we create the call to
|
||||
// __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is
|
||||
// not there yet. Thus, we assume we never reach it from
|
||||
// __kmpc_target_deinit. That allows us to remove the store in there to
|
||||
// ParallelRegionFn, which leads to bad results later on.
|
||||
ParallelRegionFnTy WorkFn = nullptr;
|
||||
__kmpc_kernel_parallel(&WorkFn);
|
||||
ASSERT(WorkFn == nullptr, nullptr);
|
||||
}
|
||||
|
||||
return mapping::getThreadIdInBlock();
|
||||
}
|
||||
@ -140,8 +128,22 @@ void __kmpc_target_deinit() {
|
||||
if (IsSPMD)
|
||||
return;
|
||||
|
||||
// Signal the workers to exit the state machine and exit the kernel.
|
||||
state::ParallelRegionFn = nullptr;
|
||||
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
|
||||
// Signal the workers to exit the state machine and exit the kernel.
|
||||
state::ParallelRegionFn = nullptr;
|
||||
} else if (!state::getKernelEnvironment()
|
||||
.Configuration.UseGenericStateMachine) {
|
||||
// Retrieve the work function just to ensure we always call
|
||||
// __kmpc_kernel_parallel even if a custom state machine is used.
|
||||
// TODO: this is not super pretty. The problem is we create the call to
|
||||
// __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
|
||||
// is not there yet. Thus, we assume we never reach it from
|
||||
// __kmpc_target_deinit. That allows us to remove the store in there to
|
||||
// ParallelRegionFn, which leads to bad results later on.
|
||||
ParallelRegionFnTy WorkFn = nullptr;
|
||||
__kmpc_kernel_parallel(&WorkFn);
|
||||
ASSERT(WorkFn == nullptr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
|
||||
|
Loading…
Reference in New Issue
Block a user