mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-03 07:38:57 +00:00
[libomptarget] Implement wavefront functions for amdgcn
Summary: [libomptarget] Implement wavefront functions for amdgcn Reviewers: jdoerfert, ABataev, grokos, arsenm Reviewed By: arsenm Subscribers: saiislam, wdng, arsenm, jvesely, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D73077
This commit is contained in:
parent
8a1f4feb1b
commit
6a82f0f0b9
@ -12,6 +12,32 @@
|
||||
|
||||
#include "target_impl.h"
|
||||
|
||||
// Implementations initially derived from hcc
|
||||
|
||||
static DEVICE uint32_t getLaneId(void) {
|
||||
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
|
||||
}
|
||||
|
||||
// Initialized with a 64-bit mask with bits set in positions less than the
|
||||
// thread's lane number in the warp
|
||||
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
|
||||
uint32_t lane = getLaneId();
|
||||
int64_t ballot = __kmpc_impl_activemask();
|
||||
uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
|
||||
return mask & ballot;
|
||||
}
|
||||
|
||||
// Initialized with a 64-bit mask with bits set in positions greater than the
|
||||
// thread's lane number in the warp
|
||||
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
|
||||
uint32_t lane = getLaneId();
|
||||
if (lane == (WARPSIZE - 1))
|
||||
return 0;
|
||||
uint64_t ballot = __kmpc_impl_activemask();
|
||||
uint64_t mask = (~((uint64_t)0)) << (lane + 1);
|
||||
return mask & ballot;
|
||||
}
|
||||
|
||||
DEVICE double __kmpc_impl_get_wtick() { return ((double)1E-9); }
|
||||
|
||||
EXTERN uint64_t __clock64();
|
||||
@ -19,6 +45,28 @@ DEVICE double __kmpc_impl_get_wtime() {
|
||||
return ((double)1.0 / 745000000.0) * __clock64();
|
||||
}
|
||||
|
||||
// Warp vote function
|
||||
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
|
||||
// 33 is ICMP_NE from llvm/include/llvm/IR/InstrTypes.h
|
||||
return __builtin_amdgcn_uicmp(1, 0, 33);
|
||||
}
|
||||
|
||||
DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
|
||||
int32_t srcLane) {
|
||||
int width = WARPSIZE;
|
||||
int self = getLaneId();
|
||||
int index = srcLane + (self & ~(width - 1));
|
||||
return __builtin_amdgcn_ds_bpermute(index << 2, var);
|
||||
}
|
||||
|
||||
DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
|
||||
uint32_t laneDelta, int32_t width) {
|
||||
int self = getLaneId();
|
||||
int index = self + laneDelta;
|
||||
index = (int)(laneDelta + (self & (width - 1))) >= width ? self : index;
|
||||
return __builtin_amdgcn_ds_bpermute(index << 2, var);
|
||||
}
|
||||
|
||||
EXTERN uint64_t __ockl_get_local_size(uint32_t);
|
||||
EXTERN uint64_t __ockl_get_num_groups(uint32_t);
|
||||
DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user