mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-29 21:24:04 +00:00
Implement mem_fence on ptx
PTX does not differentiate between read and write fences. Hence, these a lowered to a mem_fence call. The mem_fence function compiles to the “member.cta” instruction, which commits all outstanding reads and writes of a thread such that these become visible to all other threads in the same CTA (i.e., work-group). The instruction does not differentiate between global and local memory. Hence, the flags parameter is ignored, except for deciding whether a “member.cta” instruction should be issued at all. Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu> llvm-svn: 315235
This commit is contained in:
parent
492d7134f3
commit
1364d268a4
@ -1,3 +1,4 @@
|
||||
mem_fence/fence.cl
|
||||
synchronization/barrier.cl
|
||||
workitem/get_global_id.cl
|
||||
workitem/get_group_id.cl
|
||||
|
15
libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
Normal file
15
libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
Normal file
@ -0,0 +1,15 @@
|
||||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF void mem_fence(cl_mem_fence_flags flags) {
|
||||
if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
|
||||
__nvvm_membar_cta();
|
||||
}
|
||||
|
||||
// We do not have separate mechanism for read and write fences.
|
||||
_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) {
|
||||
mem_fence(flags);
|
||||
}
|
||||
|
||||
_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) {
|
||||
mem_fence(flags);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user