mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-23 07:52:06 +00:00
[OpenMP][NFC] Use AsyncInfo
as the variable name for a __tgt_async_info
Reviewed By: grokos, tianshilei1992 Differential Revision: https://reviews.llvm.org/D96444
This commit is contained in:
parent
66ba494b49
commit
5449fbb5d4
@ -74,8 +74,7 @@ int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
|
||||
int64_t Size);
|
||||
|
||||
int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
|
||||
int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
int64_t Size, __tgt_async_info *AsyncInfo);
|
||||
|
||||
// Retrieve the data content from the target device using its address. In case
|
||||
// of success, return zero. Otherwise, return an error code.
|
||||
@ -85,7 +84,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
|
||||
// Asynchronous version of __tgt_rtl_data_retrieve
|
||||
int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
|
||||
void *TargetPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
// Copy the data content from one target device to another target device using
|
||||
// its address. This operation does not need to copy data back to host and then
|
||||
@ -97,7 +96,7 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcID, void *SrcPtr, int32_t DstID,
|
||||
// Asynchronous version of __tgt_rtl_data_exchange
|
||||
int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
|
||||
int32_t DesID, void *DstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
// De-allocate the data referenced by target ptr on the device. In case of
|
||||
// success, return zero. Otherwise, return an error code.
|
||||
@ -106,8 +105,8 @@ int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
|
||||
// Transfer control to the offloaded entry Entry on the target device.
|
||||
// Args and Offsets are arrays of NumArgs size of target addresses and
|
||||
// offsets. An offset should be added to the target address before passing it
|
||||
// to the outlined function on device side. If AsyncInfoPtr is nullptr, it is
|
||||
// synchronous; otherwise it is asynchronous. However, AsyncInfoPtr may be
|
||||
// to the outlined function on device side. If AsyncInfo is nullptr, it is
|
||||
// synchronous; otherwise it is asynchronous. However, AsyncInfo may be
|
||||
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
|
||||
// case of success, return zero. Otherwise, return an error code.
|
||||
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
||||
@ -116,12 +115,12 @@ int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
|
||||
// Asynchronous version of __tgt_rtl_run_target_region
|
||||
int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
|
||||
ptrdiff_t *Offsets, int32_t NumArgs,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
// Similar to __tgt_rtl_run_target_region, but additionally specify the
|
||||
// number of teams to be created and a number of threads in each team. If
|
||||
// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
|
||||
// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
|
||||
// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous.
|
||||
// However, AsyncInfo may be ignored on some platforms, like x86_64. In that
|
||||
// case, it is synchronous.
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
||||
ptrdiff_t *Offsets, int32_t NumArgs,
|
||||
@ -132,11 +131,11 @@ int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
|
||||
int32_t __tgt_rtl_run_target_team_region_async(
|
||||
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
|
||||
int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
// Device synchronization. In case of success, return zero. Otherwise, return an
|
||||
// error code.
|
||||
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfoPtr);
|
||||
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -561,8 +561,8 @@ static RTLDeviceInfoTy DeviceInfo;
|
||||
namespace {
|
||||
|
||||
int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
// Return success if we are not copying back to host from target.
|
||||
if (!HstPtr)
|
||||
@ -588,8 +588,8 @@ int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size,
|
||||
}
|
||||
|
||||
int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
atmi_status_t err;
|
||||
assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
// Return success if we are not doing host to target.
|
||||
@ -622,20 +622,20 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size,
|
||||
// there are no outstanding kernels that need to be synchronized. Any async call
|
||||
// may be passed a Queue==0, at which point the cuda implementation will set it
|
||||
// to non-null (see getStream). The cuda streams are per-device. Upstream may
|
||||
// change this interface to explicitly initialize the async_info_pointer, but
|
||||
// change this interface to explicitly initialize the AsyncInfo_pointer, but
|
||||
// until then hsa lazily initializes it as well.
|
||||
|
||||
void initAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
|
||||
void initAsyncInfo(__tgt_async_info *AsyncInfo) {
|
||||
// set non-null while using async calls, return to null to indicate completion
|
||||
assert(async_info_ptr);
|
||||
if (!async_info_ptr->Queue) {
|
||||
async_info_ptr->Queue = reinterpret_cast<void *>(UINT64_MAX);
|
||||
assert(AsyncInfo);
|
||||
if (!AsyncInfo->Queue) {
|
||||
AsyncInfo->Queue = reinterpret_cast<void *>(UINT64_MAX);
|
||||
}
|
||||
}
|
||||
void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr);
|
||||
assert(async_info_ptr->Queue);
|
||||
async_info_ptr->Queue = 0;
|
||||
void finiAsyncInfo(__tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo);
|
||||
assert(AsyncInfo->Queue);
|
||||
AsyncInfo->Queue = 0;
|
||||
}
|
||||
|
||||
bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
|
||||
@ -1501,21 +1501,20 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *) {
|
||||
int32_t __tgt_rtl_data_submit(int device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int64_t size) {
|
||||
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
__tgt_async_info async_info;
|
||||
int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &async_info);
|
||||
__tgt_async_info AsyncInfo;
|
||||
int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int64_t size,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
int64_t size, __tgt_async_info *AsyncInfo) {
|
||||
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
if (async_info_ptr) {
|
||||
initAsyncInfoPtr(async_info_ptr);
|
||||
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
|
||||
if (AsyncInfo) {
|
||||
initAsyncInfo(AsyncInfo);
|
||||
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, AsyncInfo);
|
||||
} else {
|
||||
return __tgt_rtl_data_submit(device_id, tgt_ptr, hst_ptr, size);
|
||||
}
|
||||
@ -1524,21 +1523,21 @@ int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int32_t __tgt_rtl_data_retrieve(int device_id, void *hst_ptr, void *tgt_ptr,
|
||||
int64_t size) {
|
||||
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
__tgt_async_info async_info;
|
||||
int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &async_info);
|
||||
__tgt_async_info AsyncInfo;
|
||||
int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve_async(int device_id, void *hst_ptr,
|
||||
void *tgt_ptr, int64_t size,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr && "async_info is nullptr");
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
|
||||
initAsyncInfoPtr(async_info_ptr);
|
||||
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
|
||||
initAsyncInfo(AsyncInfo);
|
||||
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
|
||||
@ -1922,9 +1921,9 @@ int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
|
||||
void *tgt_entry_ptr, void **tgt_args,
|
||||
ptrdiff_t *tgt_offsets,
|
||||
int32_t arg_num,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr && "async_info is nullptr");
|
||||
initAsyncInfoPtr(async_info_ptr);
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
initAsyncInfo(AsyncInfo);
|
||||
|
||||
// use one team and one thread
|
||||
// fix thread num
|
||||
@ -1935,15 +1934,14 @@ int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
|
||||
thread_limit, 0);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_synchronize(int32_t device_id,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
assert(async_info_ptr && "async_info is nullptr");
|
||||
int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
// Cuda asserts that async_info_ptr->Queue is non-null, but this invariant
|
||||
// Cuda asserts that AsyncInfo->Queue is non-null, but this invariant
|
||||
// is not ensured by devices.cpp for amdgcn
|
||||
// assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
|
||||
if (async_info_ptr->Queue) {
|
||||
finiAsyncInfoPtr(async_info_ptr);
|
||||
// assert(AsyncInfo->Queue && "AsyncInfo->Queue is nullptr");
|
||||
if (AsyncInfo->Queue) {
|
||||
finiAsyncInfo(AsyncInfo);
|
||||
}
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
@ -380,13 +380,13 @@ class DeviceRTLTy {
|
||||
E.Table.EntriesBegin = E.Table.EntriesEnd = nullptr;
|
||||
}
|
||||
|
||||
CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfo) const {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
if (!AsyncInfoPtr->Queue)
|
||||
AsyncInfoPtr->Queue = StreamManager->getStream(DeviceId);
|
||||
if (!AsyncInfo->Queue)
|
||||
AsyncInfo->Queue = StreamManager->getStream(DeviceId);
|
||||
|
||||
return reinterpret_cast<CUstream>(AsyncInfoPtr->Queue);
|
||||
return reinterpret_cast<CUstream>(AsyncInfo->Queue);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -812,14 +812,14 @@ public:
|
||||
}
|
||||
|
||||
int dataSubmit(const int DeviceId, const void *TgtPtr, const void *HstPtr,
|
||||
const int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
const int64_t Size, __tgt_async_info *AsyncInfo) const {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
|
||||
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
|
||||
CUstream Stream = getStream(DeviceId, AsyncInfo);
|
||||
|
||||
Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
@ -834,14 +834,14 @@ public:
|
||||
}
|
||||
|
||||
int dataRetrieve(const int DeviceId, void *HstPtr, const void *TgtPtr,
|
||||
const int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
const int64_t Size, __tgt_async_info *AsyncInfo) const {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
|
||||
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
|
||||
CUstream Stream = getStream(DeviceId, AsyncInfo);
|
||||
|
||||
Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
@ -856,14 +856,14 @@ public:
|
||||
}
|
||||
|
||||
int dataExchange(int SrcDevId, const void *SrcPtr, int DstDevId, void *DstPtr,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
|
||||
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
|
||||
int64_t Size, __tgt_async_info *AsyncInfo) const {
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
CUresult Err = cuCtxSetCurrent(DeviceData[SrcDevId].Context);
|
||||
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
CUstream Stream = getStream(SrcDevId, AsyncInfoPtr);
|
||||
CUstream Stream = getStream(SrcDevId, AsyncInfo);
|
||||
|
||||
// If they are two devices, we try peer to peer copy first
|
||||
if (SrcDevId != DstDevId) {
|
||||
@ -1032,23 +1032,23 @@ public:
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int synchronize(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const {
|
||||
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfoPtr->Queue);
|
||||
int synchronize(const int DeviceId, __tgt_async_info *AsyncInfo) const {
|
||||
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
|
||||
CUresult Err = cuStreamSynchronize(Stream);
|
||||
if (Err != CUDA_SUCCESS) {
|
||||
REPORT("Error when synchronizing stream. stream = " DPxMOD
|
||||
", async info ptr = " DPxMOD "\n",
|
||||
DPxPTR(Stream), DPxPTR(AsyncInfoPtr));
|
||||
DPxPTR(Stream), DPxPTR(AsyncInfo));
|
||||
CUDA_ERR_STRING(Err);
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
// Once the stream is synchronized, return it to stream pool and reset
|
||||
// async_info. This is to make sure the synchronization only works for its
|
||||
// AsyncInfo. This is to make sure the synchronization only works for its
|
||||
// own tasks.
|
||||
StreamManager->returnStream(
|
||||
DeviceId, reinterpret_cast<CUstream>(AsyncInfoPtr->Queue));
|
||||
AsyncInfoPtr->Queue = nullptr;
|
||||
StreamManager->returnStream(DeviceId,
|
||||
reinterpret_cast<CUstream>(AsyncInfo->Queue));
|
||||
AsyncInfo->Queue = nullptr;
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
@ -1105,13 +1105,13 @@ int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
|
||||
int64_t size) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
|
||||
__tgt_async_info async_info;
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr,
|
||||
size, &async_info);
|
||||
size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr,
|
||||
@ -1128,13 +1128,13 @@ int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
|
||||
int64_t size) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
|
||||
__tgt_async_info async_info;
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr,
|
||||
size, &async_info);
|
||||
size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
|
||||
@ -1150,13 +1150,13 @@ int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
|
||||
int32_t __tgt_rtl_data_exchange_async(int32_t src_dev_id, void *src_ptr,
|
||||
int dst_dev_id, void *dst_ptr,
|
||||
int64_t size,
|
||||
__tgt_async_info *async_info_ptr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
|
||||
assert(async_info_ptr && "async_info_ptr is nullptr");
|
||||
assert(AsyncInfo && "AsyncInfo is nullptr");
|
||||
|
||||
return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
|
||||
async_info_ptr);
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr,
|
||||
@ -1165,13 +1165,13 @@ int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr,
|
||||
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
|
||||
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
|
||||
|
||||
__tgt_async_info async_info;
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_data_exchange_async(
|
||||
src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &async_info);
|
||||
src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(src_dev_id, &async_info);
|
||||
return __tgt_rtl_synchronize(src_dev_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
|
||||
@ -1188,14 +1188,14 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
uint64_t loop_tripcount) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
|
||||
__tgt_async_info async_info;
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_run_target_team_region_async(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
|
||||
thread_limit, loop_tripcount, &async_info);
|
||||
thread_limit, loop_tripcount, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_team_region_async(
|
||||
@ -1215,13 +1215,13 @@ int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
|
||||
int32_t arg_num) {
|
||||
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
|
||||
|
||||
__tgt_async_info async_info;
|
||||
__tgt_async_info AsyncInfo;
|
||||
const int32_t rc = __tgt_rtl_run_target_region_async(
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &async_info);
|
||||
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &AsyncInfo);
|
||||
if (rc != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
return __tgt_rtl_synchronize(device_id, &async_info);
|
||||
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
|
||||
|
@ -160,11 +160,11 @@ Status RemoteOffloadImpl::Synchronize(ServerContext *Context,
|
||||
SERVER_DBG("Synchronizing device %d (probably won't work)",
|
||||
Info->device_id());
|
||||
|
||||
void *AsyncInfoPtr = (void *)Info->queue_ptr();
|
||||
void *AsyncInfo = (void *)Info->queue_ptr();
|
||||
Reply->set_number(0);
|
||||
if (PM->Devices[Info->device_id()].RTL->synchronize)
|
||||
Reply->set_number(PM->Devices[Info->device_id()].synchronize(
|
||||
(__tgt_async_info *)AsyncInfoPtr));
|
||||
(__tgt_async_info *)AsyncInfo));
|
||||
|
||||
SERVER_DBG("Synchronized device %d", Info->device_id());
|
||||
return Status::OK;
|
||||
|
@ -251,7 +251,7 @@ __tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId,
|
||||
}
|
||||
|
||||
int64_t RemoteOffloadClient::synchronize(int32_t DeviceId,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
[&](auto &RPCStatus, auto &Context) {
|
||||
@ -260,7 +260,7 @@ int64_t RemoteOffloadClient::synchronize(int32_t DeviceId,
|
||||
protobuf::Arena::CreateMessage<SynchronizeDevice>(Arena.get());
|
||||
|
||||
Info->set_device_id(DeviceId);
|
||||
Info->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Info->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
CLIENT_DBG("Synchronizing device %d", DeviceId);
|
||||
RPCStatus = Stub->Synchronize(&Context, *Info, Reply);
|
||||
@ -339,7 +339,7 @@ void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size,
|
||||
|
||||
int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
|
||||
void *HstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
@ -360,7 +360,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
|
||||
Request->set_tgt_ptr((uint64_t)TgtPtr);
|
||||
Request->set_start(Start);
|
||||
Request->set_size(Size);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p",
|
||||
Start, End, Size, DeviceId, TgtPtr)
|
||||
@ -418,7 +418,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
|
||||
|
||||
int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
|
||||
void *TgtPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
[&](auto &RPCStatus, auto &Context) {
|
||||
@ -429,7 +429,7 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
|
||||
Request->set_size(Size);
|
||||
Request->set_hst_ptr((int64_t)HstPtr);
|
||||
Request->set_tgt_ptr((int64_t)TgtPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
auto *Reply = protobuf::Arena::CreateMessage<Data>(Arena.get());
|
||||
std::unique_ptr<ClientReader<Data>> Reader(
|
||||
@ -481,7 +481,7 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
|
||||
int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
|
||||
int32_t DstDevId, void *DstPtr,
|
||||
int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
[&](auto &RPCStatus, auto &Context) {
|
||||
@ -494,7 +494,7 @@ int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
|
||||
Request->set_dst_dev_id(DstDevId);
|
||||
Request->set_dst_ptr((uint64_t)DstPtr);
|
||||
Request->set_size(Size);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
CLIENT_DBG(
|
||||
"Exchanging %ld bytes on device %d at %p for %p on device %d", Size,
|
||||
@ -547,7 +547,7 @@ int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) {
|
||||
|
||||
int32_t RemoteOffloadClient::runTargetRegionAsync(
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfo) {
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
[&](auto &RPCStatus, auto &Context) {
|
||||
@ -556,7 +556,7 @@ int32_t RemoteOffloadClient::runTargetRegionAsync(
|
||||
protobuf::Arena::CreateMessage<TargetRegionAsync>(Arena.get());
|
||||
|
||||
Request->set_device_id(DeviceId);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
Request->set_tgt_entry_ptr(
|
||||
(uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
|
||||
@ -592,7 +592,7 @@ int32_t RemoteOffloadClient::runTargetRegionAsync(
|
||||
int32_t RemoteOffloadClient::runTargetTeamRegionAsync(
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
|
||||
uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) {
|
||||
uint64_t LoopTripcount, __tgt_async_info *AsyncInfo) {
|
||||
return remoteCall(
|
||||
/* Preprocess */
|
||||
[&](auto &RPCStatus, auto &Context) {
|
||||
@ -601,7 +601,7 @@ int32_t RemoteOffloadClient::runTargetTeamRegionAsync(
|
||||
protobuf::Arena::CreateMessage<TargetTeamRegionAsync>(Arena.get());
|
||||
|
||||
Request->set_device_id(DeviceId);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
|
||||
Request->set_queue_ptr((uint64_t)AsyncInfo);
|
||||
|
||||
Request->set_tgt_entry_ptr(
|
||||
(uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
|
||||
@ -712,10 +712,10 @@ __tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId,
|
||||
}
|
||||
|
||||
int64_t RemoteClientManager::synchronize(int32_t DeviceId,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
int32_t ClientIdx, DeviceIdx;
|
||||
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
|
||||
return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfoPtr);
|
||||
return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId,
|
||||
@ -741,49 +741,49 @@ int32_t RemoteClientManager::dataDelete(int32_t DeviceId, void *TgtPtr) {
|
||||
|
||||
int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
|
||||
void *HstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
int32_t ClientIdx, DeviceIdx;
|
||||
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
|
||||
return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size,
|
||||
AsyncInfoPtr);
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
|
||||
void *TgtPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
int32_t ClientIdx, DeviceIdx;
|
||||
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
|
||||
return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size,
|
||||
AsyncInfoPtr);
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
|
||||
int32_t DstDevId, void *DstPtr,
|
||||
int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx;
|
||||
std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId);
|
||||
std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId);
|
||||
return Clients[SrcClientIdx].dataExchangeAsync(
|
||||
SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfoPtr);
|
||||
SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t RemoteClientManager::runTargetRegionAsync(
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfo) {
|
||||
int32_t ClientIdx, DeviceIdx;
|
||||
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
|
||||
return Clients[ClientIdx].runTargetRegionAsync(
|
||||
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfoPtr);
|
||||
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t RemoteClientManager::runTargetTeamRegionAsync(
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
|
||||
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
|
||||
uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) {
|
||||
int32_t ClientIdx, DeviceIdx;
|
||||
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
|
||||
return Clients[ClientIdx].runTargetTeamRegionAsync(
|
||||
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
|
||||
LoopTripCount, AsyncInfoPtr);
|
||||
LoopTripCount, AsyncInfo);
|
||||
}
|
||||
|
@ -77,30 +77,30 @@ public:
|
||||
int32_t initRequires(int64_t RequiresFlags);
|
||||
|
||||
__tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
|
||||
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo);
|
||||
int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
|
||||
|
||||
void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
|
||||
int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
|
||||
|
||||
int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t Size, __tgt_async_info *AsyncInfo);
|
||||
int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t Size, __tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
|
||||
void *DstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum,
|
||||
int32_t ThreadLimit, uint64_t LoopTripCount,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
};
|
||||
|
||||
class RemoteClientManager {
|
||||
@ -138,30 +138,30 @@ public:
|
||||
int32_t initRequires(int64_t RequiresFlags);
|
||||
|
||||
__tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
|
||||
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo);
|
||||
int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
|
||||
|
||||
void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
|
||||
int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
|
||||
|
||||
int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t Size, __tgt_async_info *AsyncInfo);
|
||||
int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
int64_t Size, __tgt_async_info *AsyncInfoPtr);
|
||||
int64_t Size, __tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
|
||||
void *DstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
|
||||
int32_t ArgNum, __tgt_async_info *AsyncInfo);
|
||||
|
||||
int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum,
|
||||
int32_t ThreadLimit, uint64_t LoopTripCount,
|
||||
__tgt_async_info *AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -76,9 +76,8 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
||||
return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_synchronize(int32_t DeviceId,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
return Manager->synchronize(DeviceId, AsyncInfoPtr);
|
||||
int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo) {
|
||||
return Manager->synchronize(DeviceId, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) {
|
||||
@ -96,8 +95,8 @@ int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
|
||||
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
|
||||
void *HstPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
@ -107,9 +106,8 @@ int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
|
||||
void *TgtPtr, int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size,
|
||||
AsyncInfoPtr);
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
|
||||
@ -125,9 +123,9 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr,
|
||||
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr,
|
||||
int32_t DstDevId, void *DstPtr,
|
||||
int64_t Size,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size,
|
||||
AsyncInfoPtr);
|
||||
AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
@ -140,9 +138,9 @@ int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
__tgt_async_info *AsyncInfo) {
|
||||
return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs,
|
||||
TgtOffsets, ArgNum, AsyncInfoPtr);
|
||||
TgtOffsets, ArgNum, AsyncInfo);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
@ -158,10 +156,10 @@ int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
|
||||
int32_t __tgt_rtl_run_target_team_region_async(
|
||||
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
|
||||
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
|
||||
uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) {
|
||||
return Manager->runTargetTeamRegionAsync(
|
||||
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
|
||||
LoopTripCount, AsyncInfoPtr);
|
||||
LoopTripCount, AsyncInfo);
|
||||
}
|
||||
|
||||
// Exposed library API function
|
||||
|
@ -196,7 +196,7 @@ struct DeviceTy {
|
||||
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
|
||||
int32_t deleteData(void *TgtPtrBegin);
|
||||
|
||||
// Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
|
||||
// Data transfer. When AsyncInfo is nullptr, the transfer will be
|
||||
// synchronous.
|
||||
// Copy data from host to device
|
||||
int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
|
||||
@ -215,7 +215,7 @@ struct DeviceTy {
|
||||
int32_t NumTeams, int32_t ThreadLimit,
|
||||
uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
|
||||
|
||||
/// Synchronize device/queue/event based on \p AsyncInfoPtr and return
|
||||
/// Synchronize device/queue/event based on \p AsyncInfo and return
|
||||
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
|
||||
int32_t synchronize(AsyncInfoTy &AsyncInfo);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user