[OpenMP][libomptarget] Add support for unified memory for regular maps

Summary:
This patch adds support for using unified memory in the case of regular maps that happen when a target region is offloaded to the device.

For cases where only a single version of the data is required then the host address can be used. When variables need to be privatized in any way or globalized, then the copy to the device is still required for correctness.

Reviewers: ABataev, jdoerfert, Hahnfeld, AlexEichenberger, caomhin, grokos

Reviewed By: Hahnfeld

Subscribers: mgorny, guansong, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D65001

llvm-svn: 368192
This commit is contained in:
Gheorghe-Teodor Bercea 2019-08-07 17:29:45 +00:00
parent d8c3c17394
commit a1d20506e7
7 changed files with 383 additions and 47 deletions

View File

@ -113,7 +113,15 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
DeviceTy& Device = Devices[device_num]; DeviceTy& Device = Devices[device_num];
bool IsLast; // not used bool IsLast; // not used
int rc = (Device.getTgtPtrBegin(ptr, 0, IsLast, false) != NULL); bool IsHostPtr;
void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr);
int rc = (TgtPtr != NULL);
// Under unified memory the host pointer can be returned by the
// getTgtPtrBegin() function which means that there is no device
// corresponding point for ptr. This function should return false
// in that situation.
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
rc = !IsHostPtr;
DP("Call to omp_target_is_present returns %d\n", rc); DP("Call to omp_target_is_present returns %d\n", rc);
return rc; return rc;
} }

View File

@ -157,12 +157,17 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
// If NULL is returned, then either data allocation failed or the user tried // If NULL is returned, then either data allocation failed or the user tried
// to do an illegal mapping. // to do an illegal mapping.
void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
int64_t Size, bool &IsNew, bool IsImplicit, bool UpdateRefCount) { int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
bool UpdateRefCount) {
void *rc = NULL; void *rc = NULL;
IsHostPtr = false;
DataMapMtx.lock(); DataMapMtx.lock();
LookupResult lr = lookupMapping(HstPtrBegin, Size); LookupResult lr = lookupMapping(HstPtrBegin, Size);
// Check if the pointer is contained. // Check if the pointer is contained.
// If a variable is mapped to the device manually by the user - which would
// lead to the IsContained flag to be true - then we must ensure that the
// device address is returned even under unified memory conditions.
if (lr.Flags.IsContained || if (lr.Flags.IsContained ||
((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
auto &HT = *lr.Entry; auto &HT = *lr.Entry;
@ -183,15 +188,28 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
// Explicit extension of mapped data - not allowed. // Explicit extension of mapped data - not allowed.
DP("Explicit extension of mapping is not allowed.\n"); DP("Explicit extension of mapping is not allowed.\n");
} else if (Size) { } else if (Size) {
// If it is not contained and Size > 0 we should create a new entry for it. // If unified shared memory is active, implicitly mapped variables that are not
IsNew = true; // privatized use host address. Any explicitly mapped variables also use
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin); // host address where correctness is not impeded. In all other cases
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", " // maps are respected.
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase), // TODO: In addition to the mapping rules above, when the close map
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp)); // modifier is implemented, foce the mapping of the variable to the device.
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase, if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp)); DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
rc = (void *)tp; DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
IsHostPtr = true;
rc = HstPtrBegin;
} else {
// If it is not contained and Size > 0 we should create a new entry for it.
IsNew = true;
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
rc = (void *)tp;
}
} }
DataMapMtx.unlock(); DataMapMtx.unlock();
@ -202,8 +220,10 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
// Return the target pointer begin (where the data will be moved). // Return the target pointer begin (where the data will be moved).
// Decrement the reference counter if called from target_data_end. // Decrement the reference counter if called from target_data_end.
void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount) { bool UpdateRefCount, bool &IsHostPtr) {
void *rc = NULL; void *rc = NULL;
IsHostPtr = false;
IsLast = false;
DataMapMtx.lock(); DataMapMtx.lock();
LookupResult lr = lookupMapping(HstPtrBegin, Size); LookupResult lr = lookupMapping(HstPtrBegin, Size);
@ -221,8 +241,14 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
(CONSIDERED_INF(HT.RefCount)) ? "INF" : (CONSIDERED_INF(HT.RefCount)) ? "INF" :
std::to_string(HT.RefCount).c_str()); std::to_string(HT.RefCount).c_str());
rc = (void *)tp; rc = (void *)tp;
} else { } else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
IsLast = false; // If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to
// use directly from the host.
DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
IsHostPtr = true;
rc = HstPtrBegin;
} }
DataMapMtx.unlock(); DataMapMtx.unlock();
@ -244,6 +270,8 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
} }
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) { int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
return OFFLOAD_SUCCESS;
// Check if the pointer is contained in any sub-nodes. // Check if the pointer is contained in any sub-nodes.
int rc; int rc;
DataMapMtx.lock(); DataMapMtx.lock();

View File

@ -137,10 +137,10 @@ struct DeviceTy {
long getMapEntryRefCnt(void *HstPtrBegin); long getMapEntryRefCnt(void *HstPtrBegin);
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
bool &IsNew, bool IsImplicit, bool UpdateRefCount = true); bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount); bool UpdateRefCount, bool &IsHostPtr);
int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete); int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete);
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin); int disassociatePtr(void *HstPtrBegin);

View File

@ -242,6 +242,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
// Address of pointer on the host and device, respectively. // Address of pointer on the host and device, respectively.
void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin; void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
bool IsNew, Pointer_IsNew; bool IsNew, Pointer_IsNew;
bool IsHostPtr = false;
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
// have reached this point via __tgt_target_data_begin and not __tgt_target // have reached this point via __tgt_target_data_begin and not __tgt_target
@ -253,7 +254,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
DP("Has a pointer entry: \n"); DP("Has a pointer entry: \n");
// base is address of pointer. // base is address of pointer.
Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase, Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef); sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
if (!Pointer_TgtPtrBegin) { if (!Pointer_TgtPtrBegin) {
DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
"illegal mapping).\n"); "illegal mapping).\n");
@ -269,7 +270,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
} }
void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
data_size, IsNew, IsImplicit, UpdateRef); data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
if (!TgtPtrBegin && data_size) { if (!TgtPtrBegin && data_size) {
// If data_size==0, then the argument could be a zero-length pointer to // If data_size==0, then the argument could be a zero-length pointer to
// NULL, so getOrAlloc() returning NULL is not an error. // NULL, so getOrAlloc() returning NULL is not an error.
@ -289,19 +290,21 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
bool copy = false; bool copy = false;
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
copy = true; if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
// Copy data only if the "parent" struct has RefCount==1.
int32_t parent_idx = member_of(arg_types[i]);
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
assert(parent_rc > 0 && "parent struct not found");
if (parent_rc == 1) {
copy = true; copy = true;
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
// Copy data only if the "parent" struct has RefCount==1.
int32_t parent_idx = member_of(arg_types[i]);
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
assert(parent_rc > 0 && "parent struct not found");
if (parent_rc == 1) {
copy = true;
}
} }
} }
if (copy) { if (copy && !IsHostPtr) {
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
@ -312,7 +315,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
} }
} }
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
@ -363,14 +366,14 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
} }
} }
bool IsLast; bool IsLast, IsHostPtr;
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) || bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE; bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
// If PTR_AND_OBJ, HstPtrBegin is address of pointee // If PTR_AND_OBJ, HstPtrBegin is address of pointee
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast, void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
UpdateRef); UpdateRef, IsHostPtr);
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
" - is%s last\n", data_size, DPxPTR(TgtPtrBegin), " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
(IsLast ? "" : " not")); (IsLast ? "" : " not"));
@ -387,18 +390,22 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
bool CopyMember = false; bool CopyMember = false;
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
// Copy data only if the "parent" struct has RefCount==1. !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
int32_t parent_idx = member_of(arg_types[i]); // Copy data only if the "parent" struct has RefCount==1.
long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]); int32_t parent_idx = member_of(arg_types[i]);
assert(parent_rc > 0 && "parent struct not found"); long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
if (parent_rc == 1) { assert(parent_rc > 0 && "parent struct not found");
CopyMember = true; if (parent_rc == 1) {
CopyMember = true;
}
} }
} }
if (DelEntry || Always || CopyMember) { if ((DelEntry || Always || CopyMember) &&
!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin)) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
@ -471,14 +478,21 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
void *HstPtrBegin = args[i]; void *HstPtrBegin = args[i];
int64_t MapSize = arg_sizes[i]; int64_t MapSize = arg_sizes[i];
bool IsLast; bool IsLast, IsHostPtr;
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
false); false, IsHostPtr);
if (!TgtPtrBegin) { if (!TgtPtrBegin) {
DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
continue; continue;
} }
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
DPxPTR(HstPtrBegin));
continue;
}
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@ -514,6 +528,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
DP("Copying data to device failed.\n"); DP("Copying data to device failed.\n");
return OFFLOAD_FAIL; return OFFLOAD_FAIL;
} }
uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t lb = (uintptr_t) HstPtrBegin;
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
Device.ShadowMtx.lock(); Device.ShadowMtx.lock();
@ -640,19 +655,26 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
void *HstPtrVal = args[i]; void *HstPtrVal = args[i];
void *HstPtrBegin = args_base[i]; void *HstPtrBegin = args_base[i];
void *HstPtrBase = args[idx]; void *HstPtrBase = args[idx];
bool IsLast; // unused. bool IsLast, IsHostPtr; // unused.
void *TgtPtrBase = void *TgtPtrBase =
(void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]); (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
void *Pointer_TgtPtrBegin = void *Pointer_TgtPtrBegin =
Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false); Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false,
IsHostPtr);
if (!Pointer_TgtPtrBegin) { if (!Pointer_TgtPtrBegin) {
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
DPxPTR(HstPtrVal)); DPxPTR(HstPtrVal));
continue; continue;
} }
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("Unified memory is active, no need to map lambda captured"
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));
continue;
}
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin,
@ -668,7 +690,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
void *HstPtrBase = args_base[i]; void *HstPtrBase = args_base[i];
void *TgtPtrBegin; void *TgtPtrBegin;
ptrdiff_t TgtBaseOffset; ptrdiff_t TgtBaseOffset;
bool IsLast; // unused. bool IsLast, IsHostPtr; // unused.
if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) { if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
DP("Forwarding first-private value " DPxMOD " to the target construct\n", DP("Forwarding first-private value " DPxMOD " to the target construct\n",
DPxPTR(HstPtrBase)); DPxPTR(HstPtrBase));
@ -705,14 +727,14 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
} }
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
false); false, IsHostPtr);
TgtBaseOffset = 0; // no offset for ptrs. TgtBaseOffset = 0; // no offset for ptrs.
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
"object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
DPxPTR(HstPtrBase)); DPxPTR(HstPtrBase));
} else { } else {
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast, TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
false); false, IsHostPtr);
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
#ifdef OMPTARGET_DEBUG #ifdef OMPTARGET_DEBUG
void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);

View File

@ -43,4 +43,4 @@ int main() {
{} {}
return 0; return 0;
} }

View File

@ -0,0 +1,164 @@
// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
#include <stdio.h>
#include <omp.h>
// ---------------------------------------------------------------------------
// Various definitions copied from OpenMP RTL
extern void __tgt_register_requires(int64_t);
// End of definitions copied from OpenMP RTL.
// ---------------------------------------------------------------------------
#pragma omp requires unified_shared_memory
#define N 1024
void init(int A[], int B[], int C[]) {
for (int i = 0; i < N; ++i) {
A[i] = 0;
B[i] = 1;
C[i] = i;
}
}
int main(int argc, char *argv[]) {
const int device = omp_get_default_device();
// Manual registration of requires flags for Clang versions
// that do not support requires.
__tgt_register_requires(8);
// CHECK: Initial device: -10
printf("Initial device: %d\n", omp_get_initial_device());
//
// Target alloc & target memcpy
//
int A[N], B[N], C[N];
// Init
init(A, B, C);
int *pA, *pB, *pC;
// map ptrs
pA = &A[0];
pB = &B[0];
pC = &C[0];
int *d_A = (int *)omp_target_alloc(N * sizeof(int), device);
int *d_B = (int *)omp_target_alloc(N * sizeof(int), device);
int *d_C = (int *)omp_target_alloc(N * sizeof(int), device);
// CHECK: omp_target_alloc succeeded
printf("omp_target_alloc %s\n", d_A && d_B && d_C ? "succeeded" : "failed");
omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device,
omp_get_initial_device());
omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device,
omp_get_initial_device());
#pragma omp target is_device_ptr(d_A, d_B, d_C) device(device)
{
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < N; i++) {
d_A[i] = d_B[i] + d_C[i] + 1;
}
}
omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(),
device);
// CHECK: Test omp_target_memcpy: Succeeded
int fail = 0;
for (int i = 0; i < N; ++i) {
if (A[i] != i + 2)
fail++;
}
if (fail) {
printf("Test omp_target_memcpy: Failed\n");
} else {
printf("Test omp_target_memcpy: Succeeded\n");
}
//
// target_is_present and target_associate/disassociate_ptr
//
init(A, B, C);
// CHECK: B is not present, associating it...
// CHECK: omp_target_associate_ptr B succeeded
if (!omp_target_is_present(B, device)) {
printf("B is not present, associating it...\n");
int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device);
printf("omp_target_associate_ptr B %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: C is not present, associating it...
// CHECK: omp_target_associate_ptr C succeeded
if (!omp_target_is_present(C, device)) {
printf("C is not present, associating it...\n");
int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device);
printf("omp_target_associate_ptr C %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: Inside target data: A is not present
// CHECK: Inside target data: B is present
// CHECK: Inside target data: C is present
#pragma omp target data map(from : B, C) device(device)
{
printf("Inside target data: A is%s present\n",
omp_target_is_present(A, device) ? "" : " not");
printf("Inside target data: B is%s present\n",
omp_target_is_present(B, device) ? "" : " not");
printf("Inside target data: C is%s present\n",
omp_target_is_present(C, device) ? "" : " not");
#pragma omp target map(from : A) device(device)
{
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < N; i++)
A[i] = B[i] + C[i] + 1;
}
}
// CHECK: B is present, disassociating it...
// CHECK: omp_target_disassociate_ptr B succeeded
// CHECK: C is present, disassociating it...
// CHECK: omp_target_disassociate_ptr C succeeded
if (omp_target_is_present(B, device)) {
printf("B is present, disassociating it...\n");
int rc = omp_target_disassociate_ptr(B, device);
printf("omp_target_disassociate_ptr B %s\n", !rc ? "succeeded" : "failed");
}
if (omp_target_is_present(C, device)) {
printf("C is present, disassociating it...\n");
int rc = omp_target_disassociate_ptr(C, device);
printf("omp_target_disassociate_ptr C %s\n", !rc ? "succeeded" : "failed");
}
// CHECK: Test omp_target_associate_ptr: Succeeded
fail = 0;
for (int i = 0; i < N; ++i) {
if (A[i] != i + 2)
fail++;
}
if (fail) {
printf("Test omp_target_associate_ptr: Failed\n");
} else {
printf("Test omp_target_associate_ptr: Succeeded\n");
}
omp_target_free(d_A, device);
omp_target_free(d_B, device);
omp_target_free(d_C, device);
printf("Done!\n");
return 0;
}

View File

@ -0,0 +1,114 @@
// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
#include <stdio.h>
#include <omp.h>
// ---------------------------------------------------------------------------
// Various definitions copied from OpenMP RTL
extern void __tgt_register_requires(int64_t);
// End of definitions copied from OpenMP RTL.
// ---------------------------------------------------------------------------
#pragma omp requires unified_shared_memory
#define N 1024
int main(int argc, char *argv[]) {
int fails;
void *host_alloc, *device_alloc;
void *host_data, *device_data;
int *alloc = (int *)malloc(N * sizeof(int));
int data[N];
// Manual registration of requires flags for Clang versions
// that do not support requires.
__tgt_register_requires(8);
for (int i = 0; i < N; ++i) {
alloc[i] = 10;
data[i] = 1;
}
host_data = &data[0];
host_alloc = &alloc[0];
// implicit mapping of data
#pragma omp target map(tofrom : device_data, device_alloc)
{
device_data = &data[0];
device_alloc = &alloc[0];
for (int i = 0; i < N; i++) {
alloc[i] += 1;
data[i] += 1;
}
}
// CHECK: Address of alloc on device matches host address.
if (device_alloc == host_alloc)
printf("Address of alloc on device matches host address.\n");
// CHECK: Address of data on device matches host address.
if (device_data == host_data)
printf("Address of data on device matches host address.\n");
// On the host, check that the arrays have been updated.
// CHECK: Alloc device values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (alloc[i] != 11)
fails++;
}
printf("Alloc device values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// CHECK: Data device values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (data[i] != 2)
fails++;
}
printf("Data device values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
//
// Test that updates on the host snd on the device are both visible.
//
// Update on the host.
for (int i = 0; i < N; ++i) {
alloc[i] += 1;
data[i] += 1;
}
#pragma omp target
{
// CHECK: Alloc host values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (alloc[i] != 12)
fails++;
}
printf("Alloc host values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
// CHECK: Data host values updated: Succeeded
fails = 0;
for (int i = 0; i < N; i++) {
if (data[i] != 3)
fails++;
}
printf("Data host values updated: %s\n",
(fails == 0) ? "Succeeded" : "Failed");
}
free(alloc);
printf("Done!\n");
return 0;
}