[OpenMP][NFC] Use AsyncInfo as the variable name for a __tgt_async_info

Reviewed By: grokos, tianshilei1992

Differential Revision: https://reviews.llvm.org/D96444
This commit is contained in:
Johannes Doerfert 2021-02-10 14:04:37 -06:00
parent 66ba494b49
commit 5449fbb5d4
8 changed files with 137 additions and 142 deletions

View File

@ -74,8 +74,7 @@ int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr,
int64_t Size);
int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr,
int64_t Size,
__tgt_async_info *AsyncInfoPtr);
int64_t Size, __tgt_async_info *AsyncInfo);
// Retrieve the data content from the target device using its address. In case
// of success, return zero. Otherwise, return an error code.
@ -85,7 +84,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
// Asynchronous version of __tgt_rtl_data_retrieve
int32_t __tgt_rtl_data_retrieve_async(int32_t ID, void *HostPtr,
void *TargetPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
// Copy the data content from one target device to another target device using
// its address. This operation does not need to copy data back to host and then
@ -97,7 +96,7 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcID, void *SrcPtr, int32_t DstID,
// Asynchronous version of __tgt_rtl_data_exchange
int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
int32_t DesID, void *DstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
// De-allocate the data referenced by target ptr on the device. In case of
// success, return zero. Otherwise, return an error code.
@ -106,8 +105,8 @@ int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
// Transfer control to the offloaded entry Entry on the target device.
// Args and Offsets are arrays of NumArgs size of target addresses and
// offsets. An offset should be added to the target address before passing it
// to the outlined function on device side. If AsyncInfoPtr is nullptr, it is
// synchronous; otherwise it is asynchronous. However, AsyncInfoPtr may be
// to the outlined function on device side. If AsyncInfo is nullptr, it is
// synchronous; otherwise it is asynchronous. However, AsyncInfo may be
// ignored on some platforms, like x86_64. In that case, it is synchronous. In
// case of success, return zero. Otherwise, return an error code.
int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
@ -116,12 +115,12 @@ int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
// Asynchronous version of __tgt_rtl_run_target_region
int32_t __tgt_rtl_run_target_region_async(int32_t ID, void *Entry, void **Args,
ptrdiff_t *Offsets, int32_t NumArgs,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
// Similar to __tgt_rtl_run_target_region, but additionally specify the
// number of teams to be created and a number of threads in each team. If
// AsyncInfoPtr is nullptr, it is synchronous; otherwise it is asynchronous.
// However, AsyncInfoPtr may be ignored on some platforms, like x86_64. In that
// AsyncInfo is nullptr, it is synchronous; otherwise it is asynchronous.
// However, AsyncInfo may be ignored on some platforms, like x86_64. In that
// case, it is synchronous.
int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
ptrdiff_t *Offsets, int32_t NumArgs,
@ -132,11 +131,11 @@ int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args,
int32_t __tgt_rtl_run_target_team_region_async(
int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs,
int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
// Device synchronization. In case of success, return zero. Otherwise, return an
// error code.
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfoPtr);
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
#ifdef __cplusplus
}

View File

@ -561,8 +561,8 @@ static RTLDeviceInfoTy DeviceInfo;
namespace {
int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
__tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");
assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large");
// Return success if we are not copying back to host from target.
if (!HstPtr)
@ -588,8 +588,8 @@ int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size,
}
int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
__tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");
atmi_status_t err;
assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large");
// Return success if we are not doing host to target.
@ -622,20 +622,20 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size,
// there are no outstanding kernels that need to be synchronized. Any async call
// may be passed a Queue==0, at which point the cuda implementation will set it
// to non-null (see getStream). The cuda streams are per-device. Upstream may
// change this interface to explicitly initialize the async_info_pointer, but
// change this interface to explicitly initialize the AsyncInfo_pointer, but
// until then hsa lazily initializes it as well.
void initAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
void initAsyncInfo(__tgt_async_info *AsyncInfo) {
// set non-null while using async calls, return to null to indicate completion
assert(async_info_ptr);
if (!async_info_ptr->Queue) {
async_info_ptr->Queue = reinterpret_cast<void *>(UINT64_MAX);
assert(AsyncInfo);
if (!AsyncInfo->Queue) {
AsyncInfo->Queue = reinterpret_cast<void *>(UINT64_MAX);
}
}
void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
assert(async_info_ptr);
assert(async_info_ptr->Queue);
async_info_ptr->Queue = 0;
void finiAsyncInfo(__tgt_async_info *AsyncInfo) {
assert(AsyncInfo);
assert(AsyncInfo->Queue);
AsyncInfo->Queue = 0;
}
bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
@ -1501,21 +1501,20 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *) {
int32_t __tgt_rtl_data_submit(int device_id, void *tgt_ptr, void *hst_ptr,
int64_t size) {
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
__tgt_async_info async_info;
int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &async_info);
__tgt_async_info AsyncInfo;
int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr,
int64_t size,
__tgt_async_info *async_info_ptr) {
int64_t size, __tgt_async_info *AsyncInfo) {
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
if (async_info_ptr) {
initAsyncInfoPtr(async_info_ptr);
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, async_info_ptr);
if (AsyncInfo) {
initAsyncInfo(AsyncInfo);
return dataSubmit(device_id, tgt_ptr, hst_ptr, size, AsyncInfo);
} else {
return __tgt_rtl_data_submit(device_id, tgt_ptr, hst_ptr, size);
}
@ -1524,21 +1523,21 @@ int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr,
int32_t __tgt_rtl_data_retrieve(int device_id, void *hst_ptr, void *tgt_ptr,
int64_t size) {
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
__tgt_async_info async_info;
int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &async_info);
__tgt_async_info AsyncInfo;
int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_data_retrieve_async(int device_id, void *hst_ptr,
void *tgt_ptr, int64_t size,
__tgt_async_info *async_info_ptr) {
assert(async_info_ptr && "async_info is nullptr");
__tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
initAsyncInfoPtr(async_info_ptr);
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, async_info_ptr);
initAsyncInfo(AsyncInfo);
return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, AsyncInfo);
}
int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
@ -1922,9 +1921,9 @@ int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
void *tgt_entry_ptr, void **tgt_args,
ptrdiff_t *tgt_offsets,
int32_t arg_num,
__tgt_async_info *async_info_ptr) {
assert(async_info_ptr && "async_info is nullptr");
initAsyncInfoPtr(async_info_ptr);
__tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");
initAsyncInfo(AsyncInfo);
// use one team and one thread
// fix thread num
@ -1935,15 +1934,14 @@ int32_t __tgt_rtl_run_target_region_async(int32_t device_id,
thread_limit, 0);
}
int32_t __tgt_rtl_synchronize(int32_t device_id,
__tgt_async_info *async_info_ptr) {
assert(async_info_ptr && "async_info is nullptr");
int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
assert(AsyncInfo && "AsyncInfo is nullptr");
// Cuda asserts that async_info_ptr->Queue is non-null, but this invariant
// Cuda asserts that AsyncInfo->Queue is non-null, but this invariant
// is not ensured by devices.cpp for amdgcn
// assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
if (async_info_ptr->Queue) {
finiAsyncInfoPtr(async_info_ptr);
// assert(AsyncInfo->Queue && "AsyncInfo->Queue is nullptr");
if (AsyncInfo->Queue) {
finiAsyncInfo(AsyncInfo);
}
return OFFLOAD_SUCCESS;
}

View File

@ -380,13 +380,13 @@ class DeviceRTLTy {
E.Table.EntriesBegin = E.Table.EntriesEnd = nullptr;
}
CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfo) const {
assert(AsyncInfo && "AsyncInfo is nullptr");
if (!AsyncInfoPtr->Queue)
AsyncInfoPtr->Queue = StreamManager->getStream(DeviceId);
if (!AsyncInfo->Queue)
AsyncInfo->Queue = StreamManager->getStream(DeviceId);
return reinterpret_cast<CUstream>(AsyncInfoPtr->Queue);
return reinterpret_cast<CUstream>(AsyncInfo->Queue);
}
public:
@ -812,14 +812,14 @@ public:
}
int dataSubmit(const int DeviceId, const void *TgtPtr, const void *HstPtr,
const int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
const int64_t Size, __tgt_async_info *AsyncInfo) const {
assert(AsyncInfo && "AsyncInfo is nullptr");
CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return OFFLOAD_FAIL;
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
CUstream Stream = getStream(DeviceId, AsyncInfo);
Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
if (Err != CUDA_SUCCESS) {
@ -834,14 +834,14 @@ public:
}
int dataRetrieve(const int DeviceId, void *HstPtr, const void *TgtPtr,
const int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
const int64_t Size, __tgt_async_info *AsyncInfo) const {
assert(AsyncInfo && "AsyncInfo is nullptr");
CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return OFFLOAD_FAIL;
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
CUstream Stream = getStream(DeviceId, AsyncInfo);
Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
if (Err != CUDA_SUCCESS) {
@ -856,14 +856,14 @@ public:
}
int dataExchange(int SrcDevId, const void *SrcPtr, int DstDevId, void *DstPtr,
int64_t Size, __tgt_async_info *AsyncInfoPtr) const {
assert(AsyncInfoPtr && "AsyncInfoPtr is nullptr");
int64_t Size, __tgt_async_info *AsyncInfo) const {
assert(AsyncInfo && "AsyncInfo is nullptr");
CUresult Err = cuCtxSetCurrent(DeviceData[SrcDevId].Context);
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return OFFLOAD_FAIL;
CUstream Stream = getStream(SrcDevId, AsyncInfoPtr);
CUstream Stream = getStream(SrcDevId, AsyncInfo);
// If they are two devices, we try peer to peer copy first
if (SrcDevId != DstDevId) {
@ -1032,23 +1032,23 @@ public:
return OFFLOAD_SUCCESS;
}
int synchronize(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const {
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfoPtr->Queue);
int synchronize(const int DeviceId, __tgt_async_info *AsyncInfo) const {
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
CUresult Err = cuStreamSynchronize(Stream);
if (Err != CUDA_SUCCESS) {
REPORT("Error when synchronizing stream. stream = " DPxMOD
", async info ptr = " DPxMOD "\n",
DPxPTR(Stream), DPxPTR(AsyncInfoPtr));
DPxPTR(Stream), DPxPTR(AsyncInfo));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}
// Once the stream is synchronized, return it to stream pool and reset
// async_info. This is to make sure the synchronization only works for its
// AsyncInfo. This is to make sure the synchronization only works for its
// own tasks.
StreamManager->returnStream(
DeviceId, reinterpret_cast<CUstream>(AsyncInfoPtr->Queue));
AsyncInfoPtr->Queue = nullptr;
StreamManager->returnStream(DeviceId,
reinterpret_cast<CUstream>(AsyncInfo->Queue));
AsyncInfo->Queue = nullptr;
return OFFLOAD_SUCCESS;
}
@ -1105,13 +1105,13 @@ int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,
int64_t size) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
__tgt_async_info async_info;
__tgt_async_info AsyncInfo;
const int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr,
size, &async_info);
size, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr,
@ -1128,13 +1128,13 @@ int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr,
int64_t size) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
__tgt_async_info async_info;
__tgt_async_info AsyncInfo;
const int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr,
size, &async_info);
size, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
@ -1150,13 +1150,13 @@ int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr,
int32_t __tgt_rtl_data_exchange_async(int32_t src_dev_id, void *src_ptr,
int dst_dev_id, void *dst_ptr,
int64_t size,
__tgt_async_info *async_info_ptr) {
__tgt_async_info *AsyncInfo) {
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
assert(async_info_ptr && "async_info_ptr is nullptr");
assert(AsyncInfo && "AsyncInfo is nullptr");
return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
async_info_ptr);
AsyncInfo);
}
int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr,
@ -1165,13 +1165,13 @@ int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr,
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
__tgt_async_info async_info;
__tgt_async_info AsyncInfo;
const int32_t rc = __tgt_rtl_data_exchange_async(
src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &async_info);
src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(src_dev_id, &async_info);
return __tgt_rtl_synchronize(src_dev_id, &AsyncInfo);
}
int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) {
@ -1188,14 +1188,14 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr,
uint64_t loop_tripcount) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
__tgt_async_info async_info;
__tgt_async_info AsyncInfo;
const int32_t rc = __tgt_rtl_run_target_team_region_async(
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num,
thread_limit, loop_tripcount, &async_info);
thread_limit, loop_tripcount, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_run_target_team_region_async(
@ -1215,13 +1215,13 @@ int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
int32_t arg_num) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
__tgt_async_info async_info;
__tgt_async_info AsyncInfo;
const int32_t rc = __tgt_rtl_run_target_region_async(
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &async_info);
device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
return __tgt_rtl_synchronize(device_id, &async_info);
return __tgt_rtl_synchronize(device_id, &AsyncInfo);
}
int32_t __tgt_rtl_run_target_region_async(int32_t device_id,

View File

@ -160,11 +160,11 @@ Status RemoteOffloadImpl::Synchronize(ServerContext *Context,
SERVER_DBG("Synchronizing device %d (probably won't work)",
Info->device_id());
void *AsyncInfoPtr = (void *)Info->queue_ptr();
void *AsyncInfo = (void *)Info->queue_ptr();
Reply->set_number(0);
if (PM->Devices[Info->device_id()].RTL->synchronize)
Reply->set_number(PM->Devices[Info->device_id()].synchronize(
(__tgt_async_info *)AsyncInfoPtr));
(__tgt_async_info *)AsyncInfo));
SERVER_DBG("Synchronized device %d", Info->device_id());
return Status::OK;

View File

@ -251,7 +251,7 @@ __tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId,
}
int64_t RemoteOffloadClient::synchronize(int32_t DeviceId,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
[&](auto &RPCStatus, auto &Context) {
@ -260,7 +260,7 @@ int64_t RemoteOffloadClient::synchronize(int32_t DeviceId,
protobuf::Arena::CreateMessage<SynchronizeDevice>(Arena.get());
Info->set_device_id(DeviceId);
Info->set_queue_ptr((uint64_t)AsyncInfoPtr);
Info->set_queue_ptr((uint64_t)AsyncInfo);
CLIENT_DBG("Synchronizing device %d", DeviceId);
RPCStatus = Stub->Synchronize(&Context, *Info, Reply);
@ -339,7 +339,7 @@ void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size,
int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
@ -360,7 +360,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
Request->set_tgt_ptr((uint64_t)TgtPtr);
Request->set_start(Start);
Request->set_size(Size);
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
Request->set_queue_ptr((uint64_t)AsyncInfo);
CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p",
Start, End, Size, DeviceId, TgtPtr)
@ -418,7 +418,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
void *TgtPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
[&](auto &RPCStatus, auto &Context) {
@ -429,7 +429,7 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
Request->set_size(Size);
Request->set_hst_ptr((int64_t)HstPtr);
Request->set_tgt_ptr((int64_t)TgtPtr);
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
Request->set_queue_ptr((uint64_t)AsyncInfo);
auto *Reply = protobuf::Arena::CreateMessage<Data>(Arena.get());
std::unique_ptr<ClientReader<Data>> Reader(
@ -481,7 +481,7 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
int32_t DstDevId, void *DstPtr,
int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
[&](auto &RPCStatus, auto &Context) {
@ -494,7 +494,7 @@ int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
Request->set_dst_dev_id(DstDevId);
Request->set_dst_ptr((uint64_t)DstPtr);
Request->set_size(Size);
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
Request->set_queue_ptr((uint64_t)AsyncInfo);
CLIENT_DBG(
"Exchanging %ld bytes on device %d at %p for %p on device %d", Size,
@ -547,7 +547,7 @@ int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) {
int32_t RemoteOffloadClient::runTargetRegionAsync(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
int32_t ArgNum, __tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
[&](auto &RPCStatus, auto &Context) {
@ -556,7 +556,7 @@ int32_t RemoteOffloadClient::runTargetRegionAsync(
protobuf::Arena::CreateMessage<TargetRegionAsync>(Arena.get());
Request->set_device_id(DeviceId);
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
Request->set_queue_ptr((uint64_t)AsyncInfo);
Request->set_tgt_entry_ptr(
(uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
@ -592,7 +592,7 @@ int32_t RemoteOffloadClient::runTargetRegionAsync(
int32_t RemoteOffloadClient::runTargetTeamRegionAsync(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) {
uint64_t LoopTripcount, __tgt_async_info *AsyncInfo) {
return remoteCall(
/* Preprocess */
[&](auto &RPCStatus, auto &Context) {
@ -601,7 +601,7 @@ int32_t RemoteOffloadClient::runTargetTeamRegionAsync(
protobuf::Arena::CreateMessage<TargetTeamRegionAsync>(Arena.get());
Request->set_device_id(DeviceId);
Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
Request->set_queue_ptr((uint64_t)AsyncInfo);
Request->set_tgt_entry_ptr(
(uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
@ -712,10 +712,10 @@ __tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId,
}
int64_t RemoteClientManager::synchronize(int32_t DeviceId,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
int32_t ClientIdx, DeviceIdx;
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfoPtr);
return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfo);
}
int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId,
@ -741,49 +741,49 @@ int32_t RemoteClientManager::dataDelete(int32_t DeviceId, void *TgtPtr) {
int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
int32_t ClientIdx, DeviceIdx;
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size,
AsyncInfoPtr);
AsyncInfo);
}
int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
void *TgtPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
int32_t ClientIdx, DeviceIdx;
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size,
AsyncInfoPtr);
AsyncInfo);
}
int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
int32_t DstDevId, void *DstPtr,
int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx;
std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId);
std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId);
return Clients[SrcClientIdx].dataExchangeAsync(
SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfoPtr);
SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfo);
}
int32_t RemoteClientManager::runTargetRegionAsync(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
int32_t ArgNum, __tgt_async_info *AsyncInfo) {
int32_t ClientIdx, DeviceIdx;
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
return Clients[ClientIdx].runTargetRegionAsync(
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfoPtr);
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfo);
}
int32_t RemoteClientManager::runTargetTeamRegionAsync(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) {
int32_t ClientIdx, DeviceIdx;
std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
return Clients[ClientIdx].runTargetTeamRegionAsync(
DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
LoopTripCount, AsyncInfoPtr);
LoopTripCount, AsyncInfo);
}

View File

@ -77,30 +77,30 @@ public:
int32_t initRequires(int64_t RequiresFlags);
__tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo);
int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int64_t Size, __tgt_async_info *AsyncInfoPtr);
int64_t Size, __tgt_async_info *AsyncInfo);
int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
int64_t Size, __tgt_async_info *AsyncInfoPtr);
int64_t Size, __tgt_async_info *AsyncInfo);
int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
void *DstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
int32_t ArgNum, __tgt_async_info *AsyncInfo);
int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, int32_t TeamNum,
int32_t ThreadLimit, uint64_t LoopTripCount,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
};
class RemoteClientManager {
@ -138,30 +138,30 @@ public:
int32_t initRequires(int64_t RequiresFlags);
__tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo);
int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int64_t Size, __tgt_async_info *AsyncInfoPtr);
int64_t Size, __tgt_async_info *AsyncInfo);
int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
int64_t Size, __tgt_async_info *AsyncInfoPtr);
int64_t Size, __tgt_async_info *AsyncInfo);
int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
void *DstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
int32_t ArgNum, __tgt_async_info *AsyncInfo);
int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, int32_t TeamNum,
int32_t ThreadLimit, uint64_t LoopTripCount,
__tgt_async_info *AsyncInfoPtr);
__tgt_async_info *AsyncInfo);
};
#endif

View File

@ -76,9 +76,8 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image);
}
int32_t __tgt_rtl_synchronize(int32_t DeviceId,
__tgt_async_info *AsyncInfoPtr) {
return Manager->synchronize(DeviceId, AsyncInfoPtr);
int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo) {
return Manager->synchronize(DeviceId, AsyncInfo);
}
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) {
@ -96,8 +95,8 @@ int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr);
__tgt_async_info *AsyncInfo) {
return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfo);
}
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
@ -107,9 +106,8 @@ int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
void *TgtPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size,
AsyncInfoPtr);
__tgt_async_info *AsyncInfo) {
return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo);
}
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
@ -125,9 +123,9 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr,
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr,
int32_t DstDevId, void *DstPtr,
int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size,
AsyncInfoPtr);
AsyncInfo);
}
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
@ -140,9 +138,9 @@ int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum,
__tgt_async_info *AsyncInfoPtr) {
__tgt_async_info *AsyncInfo) {
return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs,
TgtOffsets, ArgNum, AsyncInfoPtr);
TgtOffsets, ArgNum, AsyncInfo);
}
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
@ -158,10 +156,10 @@ int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
int32_t __tgt_rtl_run_target_team_region_async(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) {
return Manager->runTargetTeamRegionAsync(
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
LoopTripCount, AsyncInfoPtr);
LoopTripCount, AsyncInfo);
}
// Exposed library API function

View File

@ -196,7 +196,7 @@ struct DeviceTy {
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t deleteData(void *TgtPtrBegin);
// Data transfer. When AsyncInfoPtr is nullptr, the transfer will be
// Data transfer. When AsyncInfo is nullptr, the transfer will be
// synchronous.
// Copy data from host to device
int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
@ -215,7 +215,7 @@ struct DeviceTy {
int32_t NumTeams, int32_t ThreadLimit,
uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
/// Synchronize device/queue/event based on \p AsyncInfoPtr and return
/// Synchronize device/queue/event based on \p AsyncInfo and return
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t synchronize(AsyncInfoTy &AsyncInfo);